More work on the drugbank importer, got xpath figured out finally

This commit is contained in:
2013-12-09 21:06:10 -05:00
parent b20dc5acab
commit 00a6a4da74
5 changed files with 43 additions and 4 deletions

View File

@@ -0,0 +1,31 @@
import mercy.MercyApplication
import xml.dom.pulldom
from mercy.models.drugbank import *
import sqlalchemy.exc
import xpath
class DrugBankImporter:
def __init__(self, *args, **kwargs):
self.__db = mercy.MercyApplication.get_db()
def _saveobj(self, obj):
self.__db.session.add(obj)
self.__db.session.commit()
def read(self, fname):
events = xml.dom.pulldom.parse(fname)
for event, node in events:
if event == xml.dom.pulldom.START_ELEMENT and node.tagName == 'drug':
events.expandNode(node)
self.__convert_drug(node)
def __convert_drug(self, node):
drug = Drug()
drug.name = xpath.findvalue('name', node)
drug.indication = xpath.findvalue('indication', node)
drug.fda_product_id = xpath.findvalue('external-identifiers/external-identifier[starts-with(resource, "National Drug Code Directory")]/identifier', node)
drug.wikipedia = xpath.findvalue('external-links/external-link[starts-with(resource, "Wikipedia")]/url', node)
if not drug.fda_product_id:
return
print str(drug)
#self._saveobj(drug)

View File

@@ -18,7 +18,8 @@ class Drug(SimpleModel, db.Model):
__repr_keys__ = { 'id': basestring, __repr_keys__ = { 'id': basestring,
'name': basestring, 'name': basestring,
'ndc_id': basestring 'fda_product_id': basestring,
'wikipedia': basestring
} }
class Price(SimpleModel, db.Model): class Price(SimpleModel, db.Model):

View File

@@ -20,7 +20,7 @@ class SimpleModel():
values = [] values = []
for (name, otype) in self.__class__.__repr_keys__.iteritems(): for (name, otype) in self.__class__.__repr_keys__.iteritems():
if otype == basestring: if otype == basestring:
values += "'{}'".format(str(getattr(self.__class__, name))) values.append("'{}'".format(str(getattr(self, name))))
else: else:
values += str(getattr(self.__class__, name)) values.append(str(getattr(self, name)))
return "<{}({})>".format(self.__class__.__name__, ', '.join(values)) return "<{}({})>".format(self.__class__.__name__, ', '.join(values))

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env python
import sys
import mercy.importers.drugbank
sys.exit(mercy.importers.drugbank.DrugBankImporter().read(sys.argv[1]))

View File

@@ -17,7 +17,8 @@ if __name__ == "__main__":
"sqlalchemy", "sqlalchemy",
"alembic", "alembic",
"psycopg2", "psycopg2",
"flask-sqlalchemy"], "flask-sqlalchemy",
"py-dom-xpath"],
scripts=["scripts/mercy.wsgi"], scripts=["scripts/mercy.wsgi"],
packages=["mercy", packages=["mercy",
"mercy/models", "mercy/models",