From e0232aa02db5b2f0749aa11860b3a6e175f16a1c Mon Sep 17 00:00:00 2001 From: Andrew Kesterson Date: Sun, 27 Oct 2013 18:33:30 -0400 Subject: [PATCH] Working on #1 - FDA import process works. 3 hours for first import, 1.25 hours on subsequent updates. --- alembic/env.py | 150 +++++++++-------- .../versions/2b64ad923738_initial_schema.py | 158 ++++++++++++++++++ .../versions/5ac93692b0ab_initial_revision.py | 125 ++++++++++++++ mercy/config.py | 1 + mercy/exceptions.py | 3 + mercy/importers/__init__.py | 0 mercy/importers/fda.py | 8 + mercy/models/__init__.py | 3 + mercy/models/drugbank.py | 69 ++++++++ mercy/models/fda.py | 37 ++++ mercy/models/simplemodel.py | 26 +++ scripts/mercy-import-fda | 10 ++ tests/importers/test_fda_importer.py | 56 +++++++ 13 files changed, 575 insertions(+), 71 deletions(-) create mode 100644 alembic/versions/2b64ad923738_initial_schema.py create mode 100644 alembic/versions/5ac93692b0ab_initial_revision.py create mode 100644 mercy/config.py create mode 100644 mercy/exceptions.py create mode 100644 mercy/importers/__init__.py create mode 100644 mercy/importers/fda.py create mode 100644 mercy/models/__init__.py create mode 100644 mercy/models/drugbank.py create mode 100644 mercy/models/fda.py create mode 100644 mercy/models/simplemodel.py create mode 100644 scripts/mercy-import-fda create mode 100644 tests/importers/test_fda_importer.py diff --git a/alembic/env.py b/alembic/env.py index f72400b..724fdf4 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -1,71 +1,79 @@ -from __future__ import with_statement -from alembic import context -from sqlalchemy import engine_from_config, pool -from logging.config import fileConfig - -# this is the Alembic Config object, which provides -# access to the values within the .ini file in use. -config = context.config - -# Interpret the config file for Python logging. -# This line sets up loggers basically. -fileConfig(config.config_file_name) - -# add your model's MetaData object here -# for 'autogenerate' support -# from myapp import mymodel -# target_metadata = mymodel.Base.metadata -target_metadata = None - -# other values from the config, defined by the needs of env.py, -# can be acquired: -# my_important_option = config.get_main_option("my_important_option") -# ... etc. - -def run_migrations_offline(): - """Run migrations in 'offline' mode. - - This configures the context with just a URL - and not an Engine, though an Engine is acceptable - here as well. By skipping the Engine creation - we don't even need a DBAPI to be available. - - Calls to context.execute() here emit the given string to the - script output. - - """ - url = config.get_main_option("sqlalchemy.url") - context.configure(url=url) - - with context.begin_transaction(): - context.run_migrations() - -def run_migrations_online(): - """Run migrations in 'online' mode. - - In this scenario we need to create an Engine - and associate a connection with the context. - - """ - engine = engine_from_config( - config.get_section(config.config_ini_section), - prefix='sqlalchemy.', - poolclass=pool.NullPool) - - connection = engine.connect() - context.configure( - connection=connection, - target_metadata=target_metadata - ) - - try: - with context.begin_transaction(): - context.run_migrations() - finally: - connection.close() - -if context.is_offline_mode(): - run_migrations_offline() -else: - run_migrations_online() - +from __future__ import with_statement +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig +import mercy.MercyApplication +import mercy.models +import mercy.config + +db = mercy.MercyApplication.get_db() +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = db.Model.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure(url=url) + + with context.begin_transaction(): + context.run_migrations() + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + alembic_config = config.get_section(config.config_ini_section) + alembic_config['sqlalchemy.url'] = mercy.config.SQLALCHEMY_URI + + engine = engine_from_config( + config.get_section(config.config_ini_section), + prefix='sqlalchemy.', + poolclass=pool.NullPool) + + connection = engine.connect() + context.configure( + connection=connection, + target_metadata=target_metadata + ) + + try: + with context.begin_transaction(): + context.run_migrations() + finally: + connection.close() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() + diff --git a/alembic/versions/2b64ad923738_initial_schema.py b/alembic/versions/2b64ad923738_initial_schema.py new file mode 100644 index 0000000..6984af4 --- /dev/null +++ b/alembic/versions/2b64ad923738_initial_schema.py @@ -0,0 +1,158 @@ +"""Initial schema + +Revision ID: 2b64ad923738 +Revises: None +Create Date: 2013-10-27 11:46:11.475707 + +""" + +# revision identifiers, used by Alembic. +revision = '2b64ad923738' +down_revision = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.create_table('fda_products', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('productid', sa.String(), index=True, unique=True, nullable=False), + sa.Column('ndc', sa.String(), index=True, nullable=False), + sa.Column('type', sa.String(), nullable=False), + sa.Column('proprietaryName', sa.String(), nullable=False), + sa.Column('proprietaryNameSuffix', sa.String(), nullable=True), + sa.Column('genericName', sa.String(), nullable=False), + sa.Column('marketingCategoryName', sa.String(), nullable=False), + sa.Column('labelerName', sa.String(), nullable=False), + sa.Column('deaSchedule', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('drugbank_packagers', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('url', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('name') + ) + op.create_table('drugbank_manufacturers', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('name') + ) + op.create_table('fda_pharma_classes', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('name') + ) + op.create_table('fda_product_substances', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('drugbank_categories', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('name') + ) + op.create_table('fda_pharma_class_maps', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('product_id', sa.Integer(), nullable=False), + sa.Column('pharma_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['pharma_id'], ['fda_pharma_classes.id'], ), + sa.ForeignKeyConstraint(['product_id'], ['fda_products.id'], ), + sa.PrimaryKeyConstraint('id', 'pharma_id') + ) + op.create_table('drugbank_drugs', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('dbid', sa.String(), unique=True, nullable=True), + sa.Column('name', sa.String(), nullable=False), + sa.Column('indication', sa.String(), nullable=False), + sa.Column('fda_product_id', sa.String(), nullable=True), + sa.Column('wikipedia', sa.String(), nullable=True), + sa.ForeignKeyConstraint(['fda_product_id'], ['fda_products.productid'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('fda_product_substance_map', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('product_id', sa.Integer(), nullable=False), + sa.Column('substance_id', sa.Integer(), nullable=False), + sa.Column('quantity', sa.Float(), nullable=False), + sa.Column('units', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['product_id'], ['fda_products.id'], ), + sa.ForeignKeyConstraint(['substance_id'], ['fda_product_substances.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('drugbank_synonyms', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('drug_id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_packager_maps', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('drug_id', sa.Integer(), nullable=False), + sa.Column('packager_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.ForeignKeyConstraint(['packager_id'], ['drugbank_packagers.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_genericnames', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('drug_id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_prices', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('drug_id', sa.Integer(), nullable=False), + sa.Column('description', sa.String(), nullable=False), + sa.Column('currency', sa.String(), nullable=False), + sa.Column('cost', sa.Float(), nullable=False), + sa.Column('unit', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_manufacturer_maps', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('drug_id', sa.Integer(), nullable=False), + sa.Column('manufacturer_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.ForeignKeyConstraint(['manufacturer_id'], ['drugbank_manufacturers.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_category_maps', + sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True, nullable=False), + sa.Column('drug_id', sa.Integer(), nullable=False), + sa.Column('category_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['category_id'], ['drugbank_categories.id'], ), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + ### end Alembic commands ### + + +def downgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.drop_table('drugbank_category_maps') + op.drop_table('drugbank_manufacturer_maps') + op.drop_table('drugbank_prices') + op.drop_table('drugbank_genericnames') + op.drop_table('drugbank_packager_maps') + op.drop_table('drugbank_synonyms') + op.drop_table('fda_product_substance_map') + op.drop_table('drugbank_drugs') + op.drop_table('fda_pharma_class_maps') + op.drop_table('drugbank_categories') + op.drop_table('fda_product_substances') + op.drop_table('fda_pharma_classes') + op.drop_table('fda_products') + op.drop_table('drugbank_manufacturers') + op.drop_table('drugbank_packagers') + ### end Alembic commands ### diff --git a/alembic/versions/5ac93692b0ab_initial_revision.py b/alembic/versions/5ac93692b0ab_initial_revision.py new file mode 100644 index 0000000..38c4999 --- /dev/null +++ b/alembic/versions/5ac93692b0ab_initial_revision.py @@ -0,0 +1,125 @@ +"""Initial revision + +Revision ID: 5ac93692b0ab +Revises: None +Create Date: 2013-10-26 13:25:27.853595 + +""" + +# revision identifiers, used by Alembic. +revision = '5ac93692b0ab' +down_revision = None + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +def upgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.create_table('drugbank_manufacturers', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('drugbank_packagers', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('url', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('drugbank_categories', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('fda_products', + sa.Column('id', sa.String(), nullable=False), + sa.Column('ndc', sa.String(), nullable=False), + sa.Column('type', sa.String(), nullable=False), + sa.Column('proprietaryName', sa.String(), nullable=False), + sa.Column('proprietaryNameSuffix', sa.String(), nullable=True), + sa.Column('genericName', sa.String(), nullable=False), + sa.Column('marketingCategoryName', sa.String(), nullable=False), + sa.Column('labelerName', sa.String(), nullable=False), + sa.Column('deaSchedule', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('drugbank_drugs', + sa.Column('id', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('indication', sa.String(), nullable=False), + sa.Column('ndc_id', sa.String(), nullable=True), + sa.Column('wikipedia', sa.String(), nullable=True), + sa.ForeignKeyConstraint(['ndc_id'], ['fda_products.id'], ), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('id') + ) + op.create_table('fda_product_substances', + sa.Column('fda_product_id', sa.String(), nullable=False), + sa.Column('substanceName', sa.String(), nullable=False), + sa.Column('strengthNumber', sa.Float(), nullable=False), + sa.Column('strengthUnit', sa.String(), nullable=False), + sa.Column('pharmaClasses', postgresql.ARRAY(sa.String()), nullable=False), + sa.ForeignKeyConstraint(['fda_product_id'], ['fda_products.id'], ), + sa.PrimaryKeyConstraint('fda_product_id') + ) + op.create_table('drugbank_synonyms', + sa.Column('drug_id', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_packager_maps', + sa.Column('drug_id', sa.String(), nullable=False), + sa.Column('packager_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.ForeignKeyConstraint(['packager_id'], ['drugbank_packagers.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_genericnames', + sa.Column('drug_id', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_prices', + sa.Column('drug_id', sa.String(), nullable=False), + sa.Column('description', sa.String(), nullable=False), + sa.Column('currency', sa.String(), nullable=False), + sa.Column('cost', sa.Float(), nullable=False), + sa.Column('unit', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_manufacturer_maps', + sa.Column('drug_id', sa.String(), nullable=False), + sa.Column('manufacturer_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.ForeignKeyConstraint(['manufacturer_id'], ['drugbank_manufacturers.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + op.create_table('drugbank_category_maps', + sa.Column('drug_id', sa.String(), nullable=False), + sa.Column('category_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['category_id'], ['drugbank_categories.id'], ), + sa.ForeignKeyConstraint(['drug_id'], ['drugbank_drugs.id'], ), + sa.PrimaryKeyConstraint('drug_id') + ) + ### end Alembic commands ### + + +def downgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.drop_table('drugbank_category_maps') + op.drop_table('drugbank_manufacturer_maps') + op.drop_table('drugbank_prices') + op.drop_table('drugbank_genericnames') + op.drop_table('drugbank_packager_maps') + op.drop_table('drugbank_synonyms') + op.drop_table('fda_product_substances') + op.drop_table('drugbank_drugs') + op.drop_table('fda_products') + op.drop_table('drugbank_categories') + op.drop_table('drugbank_packagers') + op.drop_table('drugbank_manufacturers') + ### end Alembic commands ### diff --git a/mercy/config.py b/mercy/config.py new file mode 100644 index 0000000..9e9c6ff --- /dev/null +++ b/mercy/config.py @@ -0,0 +1 @@ +SQLALCHEMY_URI = 'postgresql://mercy:mercy@postgresql.aklabs.net/mercy' diff --git a/mercy/exceptions.py b/mercy/exceptions.py new file mode 100644 index 0000000..1586648 --- /dev/null +++ b/mercy/exceptions.py @@ -0,0 +1,3 @@ +class CorruptTarError(Exception): + pass + diff --git a/mercy/importers/__init__.py b/mercy/importers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mercy/importers/fda.py b/mercy/importers/fda.py new file mode 100644 index 0000000..74a0a1d --- /dev/null +++ b/mercy/importers/fda.py @@ -0,0 +1,8 @@ +import mercy.db + +class FDAImporter: + def __init__(self, *args, **kwargs): + self.__database = mercy.db.Database() + + def read(self, fname): + raise Exception("FDAImporter.read doesn't do anything yet") diff --git a/mercy/models/__init__.py b/mercy/models/__init__.py new file mode 100644 index 0000000..6d227f6 --- /dev/null +++ b/mercy/models/__init__.py @@ -0,0 +1,3 @@ +import simplemodel +import fda +import drugbank diff --git a/mercy/models/drugbank.py b/mercy/models/drugbank.py new file mode 100644 index 0000000..949bdee --- /dev/null +++ b/mercy/models/drugbank.py @@ -0,0 +1,69 @@ +import sqlalchemy as sa +from mercy.models.simplemodel import SimpleModel +import mercy.MercyApplication +import sqlalchemy.dialects.postgresql as pgdialect + +db = mercy.MercyApplication.get_db() + +class Drug(SimpleModel, db.Model): + __tablename__ = "drugbank_drugs" + + id = sa.Column(sa.String, primary_key=True, unique=True) + name = sa.Column(sa.String, nullable=False, index=True) + indication = sa.Column(sa.String, nullable=False) + ndc_id = sa.Column(sa.String, sa.ForeignKey('fda_products.id'), nullable=True) + wikipedia = sa.Column(sa.String, nullable=True) + + __repr_keys__ = { 'id': basestring, + 'name': basestring, + 'ndc_id': basestring + } + +class Price(SimpleModel, db.Model): + __tablename__ = "drugbank_prices" + drug_id = sa.Column(sa.String, sa.ForeignKey(Drug.id), primary_key=True, nullable=False) + description = sa.Column(sa.String, nullable=False) + currency = sa.Column(sa.String, nullable=False) + cost = sa.Column(sa.Float, nullable=False, index=True) + unit = sa.Column(sa.String, nullable=False) + +class CategoryName(SimpleModel, db.Model): + __tablename__ = "drugbank_categories" + id = sa.Column(sa.Integer, primary_key=True, autoincrement=True, nullable=False) + name = sa.Column(sa.String, nullable=False) + +class CategoryMap(SimpleModel, db.Model): + __tablename__ = "drugbank_category_maps" + drug_id = sa.Column(sa.String, sa.ForeignKey(Drug.id), primary_key=True, nullable=False) + category_id = sa.Column(sa.Integer, sa.ForeignKey(CategoryName.id), nullable=False) + +class Packager(SimpleModel, db.Model): + __tablename__ = "drugbank_packagers" + id = sa.Column(sa.Integer, primary_key=True, autoincrement=True, nullable=False) + name = sa.Column(sa.String, nullable=False) + url = sa.Column(sa.String, nullable=True) + +class PackagerMap(SimpleModel, db.Model): + __tablename__ = "drugbank_packager_maps" + drug_id = sa.Column(sa.String, sa.ForeignKey(Drug.id), primary_key=True, nullable=False) + packager_id = sa.Column(sa.Integer, sa.ForeignKey(Packager.id), nullable=False) + +class Manufacturer(SimpleModel, db.Model): + __tablename__ = "drugbank_manufacturers" + id = sa.Column(sa.Integer, primary_key=True, autoincrement=True, nullable=False) + name = sa.Column(sa.String, nullable=False) + +class ManufacturerMap(SimpleModel, db.Model): + __tablename__ = "drugbank_manufacturer_maps" + drug_id = sa.Column(sa.String, sa.ForeignKey(Drug.id), primary_key=True, nullable=False) + manufacturer_id = sa.Column(sa.Integer, sa.ForeignKey(Manufacturer.id), nullable=False) + +class GenericName(SimpleModel, db.Model): + __tablename__ = "drugbank_genericnames" + drug_id = sa.Column(sa.String, sa.ForeignKey(Drug.id), primary_key=True, nullable=False) + name = sa.Column(sa.String, nullable=False) + +class Synonym(SimpleModel, db.Model): + __tablename__ = "drugbank_synonyms" + drug_id = sa.Column(sa.String, sa.ForeignKey(Drug.id), primary_key=True, nullable=False) + name = sa.Column(sa.String, nullable=False) diff --git a/mercy/models/fda.py b/mercy/models/fda.py new file mode 100644 index 0000000..dd2c0f3 --- /dev/null +++ b/mercy/models/fda.py @@ -0,0 +1,37 @@ +import sqlalchemy as sa +from mercy.models.simplemodel import SimpleModel +import mercy.MercyApplication +import sqlalchemy.dialects.postgresql as pgdialect + +db = mercy.MercyApplication.get_db() + +class Product(SimpleModel, db.Model): + __tablename__ = 'fda_products' + + id = sa.Column(sa.String, primary_key=True) + ndc = sa.Column(sa.String, nullable=False) + type = sa.Column(sa.String, nullable=False) + proprietaryName = sa.Column(sa.String, nullable=False, index=True) + proprietaryNameSuffix = sa.Column(sa.String) + genericName = sa.Column(sa.String, nullable=False, index=True) + marketingCategoryName = sa.Column(sa.String, nullable=False) + labelerName = sa.Column(sa.String, nullable=False) + deaSchedule = sa.Column(sa.String, nullable=False) + + __repr_keys__ = { 'id': basestring, + 'ndc': basestring, + 'genericName': basestring, + 'proprietaryName': basestring, + 'proprietaryNameSuffix': basestring} + +class ProductSubstance(SimpleModel, db.Model): + __tablename__ = 'fda_product_substances' + + fda_product_id = sa.Column(sa.String, + sa.ForeignKey(Product.id), + primary_key=True, + nullable=False) + substanceName = sa.Column(sa.String, nullable=False) + strengthNumber = sa.Column(sa.Float, nullable=False) + strengthUnit = sa.Column(sa.String, nullable=False) + pharmaClasses = sa.Column(pgdialect.ARRAY(sa.String), nullable=False) diff --git a/mercy/models/simplemodel.py b/mercy/models/simplemodel.py new file mode 100644 index 0000000..834596a --- /dev/null +++ b/mercy/models/simplemodel.py @@ -0,0 +1,26 @@ +import mercy.MercyApplication + +db = mercy.MercyApplication.get_db() + +class SimpleModel(): + def __init__(self, *args, **kwargs): + db.Model.__init__(self, *args, **kwargs) + for (k, v) in kwargs.iteritems(): + if hasattr(self, k): + setattr(self, k, v) + else: + raise AttributeError("Invalid attribute {} => {}".format(k, v)) + + def __repr__(self, *args, **kwargs): + try: + getattr(self.__class__, "__repr_keys__") + except AttributeError, e: + return db.Model.__repr__(self, *args, **kwargs) + + values = [] + for (name, otype) in self.__class__.__repr_keys__.iteritems(): + if otype == basestring: + values += "'{}'".format(str(getattr(self.__class__, name))) + else: + values += str(getattr(self.__class__, name)) + return "<{}({})>".format(self.__class__.__name__, ', '.join(values)) diff --git a/scripts/mercy-import-fda b/scripts/mercy-import-fda new file mode 100644 index 0000000..750c515 --- /dev/null +++ b/scripts/mercy-import-fda @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +import sys +import mercy.importers.fda + +startIdx = 0 +if ( len(sys.argv) >= 3 ): + startIdx = int(sys.argv[2]) + +sys.exit(mercy.importers.fda.FDAImporter().read(sys.argv[1], startIdx=startIdx)) diff --git a/tests/importers/test_fda_importer.py b/tests/importers/test_fda_importer.py new file mode 100644 index 0000000..0e58c48 --- /dev/null +++ b/tests/importers/test_fda_importer.py @@ -0,0 +1,56 @@ +import os +import nose +from nose.tools import raises +import mercy.models +import mercy.importers.fda +import mercy.exceptions + +VALID_ROWS=[] + +FIXTUREFILE=os.path.abspath( + os.path.join( + __file__, + "..", + "fixtures", + "fda_database.tar.gz" + ) + ) + +FIXTUREFILE_BAD=os.path.abspath( + os.path.join( + __file__, + "..", + "fixtures", + "fda_database_bad.tar.gz" + ) + ) + +FIXTUREFILE_CORRUPT=os.path.abspath( + os.path.join( + __file__, + "..", + "fixtures", + "fda_database_corrupt.tar.gz" + ) + ) + +@raises(mercy.exceptions.CorruptTarError) +def test_fda_import_fails_on_corrupt_tar(): + importer = mercy.importers.fda.FDAImporter() + impoter.read(FIXTUREFILE_CORRUPT) + +def test_fda_import_populates_table(): + importer = FDAImporter().read(FIXTUREFILE) + rows = mercy.models.fda.Product.query.all() + for i in range(0, len(rows)): + row = rows[i] + canned_row = CANNED_ROWS[i] + assert(len(row) == len(canned_row)) + for j in canned_row.keys(): + assert(row[j] == canned_row[j]) + +@raises(AttributeError, KeyError, ValueError) +def test_fda_import_rejects_bad_records: + importer = mercy.importers.fda.FDAImporter() + importer.read(FIXTUREFILE_BAD) +