From 94acd5e84ec4f5176fafe3d4748bd1e685a90e4f Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Sun, 7 Jan 2024 21:12:05 -0500 Subject: [PATCH] rename --- README.md | 4 ++-- docs/scripts/generate_normalize_figure.py | 4 ++-- docs/source/index.rst | 4 ++-- docs/source/usage.rst | 6 +++--- src/gene/cli.py | 14 +++++++------- src/gene/database/__init__.py | 12 ++++++++++-- src/gene/database/database.py | 16 ++++++++-------- src/gene/database/dynamodb.py | 4 ++-- src/gene/database/postgresql.py | 4 ++-- src/gene/etl/update.py | 8 ++++---- src/gene/main.py | 4 ++-- src/gene/query.py | 22 +++++++++++----------- tests/conftest.py | 4 ++-- tests/unit/test_emit_warnings.py | 4 ++-- 14 files changed, 59 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index f6202147..81ced2ea 100644 --- a/README.md +++ b/README.md @@ -57,9 +57,9 @@ $ curl 'https://normalize.cancervariants.org/gene/normalize?q=BRAF' | python -m Or utilize the [Python API](https://gene-normalizer.readthedocs.io/en/latest/api/query_api.html) for fast access: ```python ->>> from gene.database import create_db +>>> from gene.database import get_db >>> from gene.query import QueryHandler ->>> q = QueryHandler(create_db()) +>>> q = QueryHandler(get_db()) >>> result = q.normalize("KRAS") >>> result.normalized_id 'hgnc:6407' diff --git a/docs/scripts/generate_normalize_figure.py b/docs/scripts/generate_normalize_figure.py index 54696855..0f6862b5 100644 --- a/docs/scripts/generate_normalize_figure.py +++ b/docs/scripts/generate_normalize_figure.py @@ -12,7 +12,7 @@ import gravis as gv -from gene.database import create_db +from gene.database import get_db from gene.etl.base import APP_ROOT from gene.query import QueryHandler from gene.schemas import UnmergedNormalizationService @@ -80,7 +80,7 @@ def create_gjgf(result: UnmergedNormalizationService) -> Dict: def gen_norm_figure() -> None: """Generate normalized graph figure for docs.""" - q = QueryHandler(create_db()) + q = QueryHandler(get_db()) otx2p1 = "OTX2P1" otx2p2 = "OTX2P2" diff --git a/docs/source/index.rst b/docs/source/index.rst index d64bbe4d..8e7534b2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -30,8 +30,8 @@ The Gene Normalizer can also be installed locally as a Python package for fast a .. code-block:: pycon >>> from gene.query import QueryHandler - >>> from gene.database import create_db - >>> q = QueryHandler(create_db()) + >>> from gene.database import get_db + >>> q = QueryHandler(get_db()) >>> result = q.normalize("BRAF") >>> result.normalized_id 'hgnc:1097' diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 8ba4ec96..810a91c7 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -28,9 +28,9 @@ Each search mode can be accessed directly within Python using the :ref:`query AP .. code-block:: pycon - >>> from gene.database import create_db + >>> from gene.database import get_db >>> from gene.query import QueryHandler - >>> q = QueryHandler(create_db()) + >>> q = QueryHandler(get_db()) >>> normalized_response = q.normalize('HER2') >>> normalized_response >>> normalized_response.match_type @@ -38,7 +38,7 @@ Each search mode can be accessed directly within Python using the :ref:`query AP >>> normalized_response.gene.label 'ERBB2' -Critically, the ``QueryHandler`` class must receive a database interface instance as its first argument. The most straightforward way to construct a database instance, as demonstrated above, is with the :py:meth:`create_db() ` method. This method tries to build a database connection based on a number of conditions, which are resolved in the following order: +Critically, the ``QueryHandler`` class must receive a database interface instance as its first argument. The most straightforward way to construct a database instance, as demonstrated above, is with the :py:meth:`get_db() ` method. This method tries to build a database connection based on a number of conditions, which are resolved in the following order: 1) if environment variable ``GENE_NORM_ENV`` is set to a value, or if the ``aws_instance`` method argument is True, try to create a cloud DynamoDB connection 2) if the ``db_url`` method argument is given a non-None value, try to create a DB connection to that address (if it looks like a PostgreSQL URL, create a PostgreSQL connection, but otherwise try DynamoDB) diff --git a/src/gene/cli.py b/src/gene/cli.py index b3ef3ee5..f7b84c0e 100644 --- a/src/gene/cli.py +++ b/src/gene/cli.py @@ -5,7 +5,7 @@ import click -from gene.database import create_db +from gene.database import get_db from gene.database.database import DatabaseError from gene.etl.update import update_all_sources, update_normalized, update_source from gene.schemas import SourceName @@ -79,7 +79,7 @@ def update( click.echo(ctx.get_help()) ctx.exit(1) - db = create_db(db_url, aws_instance, silent) + db = get_db(db_url, aws_instance, silent) processed_ids = None if all: @@ -129,7 +129,7 @@ def update_from_remote(data_url: Optional[str], db_url: str, silent: bool) -> No click.get_current_context().exit() if not data_url: data_url = os.environ.get("GENE_NORM_REMOTE_DB_URL") - db = create_db(db_url, False, silent) + db = get_db(db_url, False, silent) try: db.load_from_remote(data_url) except NotImplementedError: @@ -165,8 +165,8 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None: This command is equivalent to the combination of the database classes' ``check_schema_initialized()`` and ``check_tables_populated()`` methods: - >>> from gene.database import create_db - >>> db = create_db() + >>> from gene.database import get_db + >>> db = get_db() >>> db.check_schema_initialized() and db.check_tables_populated() True # DB passes checks @@ -175,7 +175,7 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None: :param verbose: if true, print result to console :param silent: if True, suppress console output """ # noqa: D301 - db = create_db(db_url, False, silent) + db = get_db(db_url, False, silent) if not db.check_schema_initialized(): if verbose: click.echo("Health check failed: DB schema uninitialized.") @@ -212,7 +212,7 @@ def dump_database(output_directory: Path, db_url: str, silent: bool) -> None: if not output_directory: output_directory = Path(".") - db = create_db(db_url, False, silent) + db = get_db(db_url, False, silent) try: db.export_db(output_directory) except NotImplementedError: diff --git a/src/gene/database/__init__.py b/src/gene/database/__init__.py index b131c435..d69cf8cb 100644 --- a/src/gene/database/__init__.py +++ b/src/gene/database/__init__.py @@ -1,10 +1,18 @@ """Provide database clients.""" from .database import ( AWS_ENV_VAR_NAME, - AbstractDatabase, DatabaseError, DatabaseInitializationError, DatabaseReadError, DatabaseWriteError, - create_db, + get_db, ) + +__all__ = [ + "AWS_ENV_VAR_NAME", + "DatabaseError", + "DatabaseInitializationError", + "DatabaseReadError", + "DatabaseWriteError", + "get_db", +] diff --git a/src/gene/database/database.py b/src/gene/database/database.py index bfe4179a..6907f839 100644 --- a/src/gene/database/database.py +++ b/src/gene/database/database.py @@ -152,9 +152,9 @@ def get_all_records(self, record_type: RecordType) -> Generator[Dict, None, None For example, - >>> from gene.database import create_db + >>> from gene.database import get_db >>> from gene.schemas import RecordType - >>> db = create_db() + >>> db = get_db() >>> for record in db.get_all_records(RecordType.MERGER): >>> pass # do something @@ -274,7 +274,7 @@ def confirm_aws_db_use(env_name: AwsEnvName) -> None: sys.exit() -def create_db( +def get_db( db_url: Optional[str] = None, aws_instance: bool = False, silent: bool = False ) -> AbstractDatabase: """Database factory method. Checks environment variables and provided parameters @@ -286,18 +286,18 @@ def create_db( Some examples: - >>> from gene.database import create_db - >>> default_db = create_db() # by default, creates DynamoDB connection on port 8000 + >>> from gene.database import get_db + >>> default_db = get_db() # by default, creates DynamoDB connection on port 8000 >>> >>> postgres_url = "postgresql://postgres@localhost:5432/gene_normalizer" - >>> pg_db = create_db(postgres_url) # creates Postgres connection at port 5432 + >>> pg_db = get_db(postgres_url) # creates Postgres connection at port 5432 >>> >>> import os >>> os.environ["GENE_NORM_DB_URL"] = "http://localhost:8001" - >>> local_db = create_db() # creates DynamoDB connection on port 8001 + >>> local_db = get_db() # creates DynamoDB connection on port 8001 >>> >>> os.environ["GENE_NORM_ENV"] = "Prod" - >>> prod_db = create_db() # creates connection to AWS cloud DynamoDB instance, + >>> prod_db = get_db() # creates connection to AWS cloud DynamoDB instance, >>> # overruling `GENE_NORM_DB_URL` variable setting Precedence is handled for connection settings like so: diff --git a/src/gene/database/dynamodb.py b/src/gene/database/dynamodb.py index 629059c8..eaf94651 100644 --- a/src/gene/database/dynamodb.py +++ b/src/gene/database/dynamodb.py @@ -321,9 +321,9 @@ def get_all_records(self, record_type: RecordType) -> Generator[Dict, None, None For example, - >>> from gene.database import create_db + >>> from gene.database import get_db >>> from gene.schemas import RecordType - >>> db = create_db() + >>> db = get_db() >>> for record in db.get_all_records(RecordType.MERGER): >>> pass # do something diff --git a/src/gene/database/postgresql.py b/src/gene/database/postgresql.py index 66a43132..0bc529e3 100644 --- a/src/gene/database/postgresql.py +++ b/src/gene/database/postgresql.py @@ -465,9 +465,9 @@ def get_all_records(self, record_type: RecordType) -> Generator[Dict, None, None For example, - >>> from gene.database import create_db + >>> from gene.database import get_db >>> from gene.schemas import RecordType - >>> db = create_db() + >>> db = get_db() >>> for record in db.get_all_records(RecordType.MERGER): >>> pass # do something diff --git a/src/gene/etl/update.py b/src/gene/etl/update.py index 09a8912c..529d79b2 100644 --- a/src/gene/etl/update.py +++ b/src/gene/etl/update.py @@ -99,9 +99,9 @@ def update_source( For example, to completely refresh HGNC data: >>> from gene.schemas import SourceName - >>> from gene.database import create_db + >>> from gene.database import get_db >>> from gene.etl.update import update_source - >>> db = create_db() + >>> db = get_db() >>> processed_ids = update_source(SourceName.HGNC, db) :param source: name of source to update @@ -203,9 +203,9 @@ def update_all_and_normalize( For example, to completely refresh all Gene Normalizer data: - >>> from gene.database import create_db + >>> from gene.database import get_db >>> from gene.etl.update import update_all_and_normalize - >>> db = create_db() + >>> db = get_db() >>> update_all_and_normalize(db, False) :param db: database instance diff --git a/src/gene/main.py b/src/gene/main.py index 18ffa17f..bb2a3e22 100644 --- a/src/gene/main.py +++ b/src/gene/main.py @@ -5,7 +5,7 @@ from fastapi import FastAPI, HTTPException, Query from gene import __version__ -from gene.database import create_db +from gene.database import get_db from gene.query import QueryHandler from gene.schemas import ( SOURCES, @@ -15,7 +15,7 @@ UnmergedNormalizationService, ) -db = create_db() +db = get_db() query_handler = QueryHandler(db) description = """ diff --git a/src/gene/query.py b/src/gene/query.py index 0a57be43..8e64f970 100644 --- a/src/gene/query.py +++ b/src/gene/query.py @@ -42,15 +42,15 @@ class QueryHandler: def __init__(self, database: AbstractDatabase) -> None: """Initialize QueryHandler instance. Requires a created database object to - initialize. The most straightforward way to do this is via the ``create_db`` + initialize. The most straightforward way to do this is via the ``get_db`` method in the ``gene.database`` module: >>> from gene.query import QueryHandler - >>> from gene.database import create_db - >>> q = QueryHandler(create_db()) + >>> from gene.database import get_db + >>> q = QueryHandler(get_db()) - We'll generally call ``create_db`` without any arguments in code examples, for - the sake of brevity. See the `usage` page in the docs and the ``create_db`` API + We'll generally call ``get_db`` without any arguments in code examples, for + the sake of brevity. See the `usage` page in the docs and the ``get_db`` API description for more details. :param database: storage backend to search against @@ -286,8 +286,8 @@ def search( """Return highest match for each source. >>> from gene.query import QueryHandler - >>> from gene.database import create_db - >>> q = QueryHandler(create_db()) + >>> from gene.database import get_db + >>> q = QueryHandler(get_db()) >>> result = q.search("BRAF") >>> result.source_matches[0].records[0].concept_id 'ncbigene:673' @@ -515,8 +515,8 @@ def normalize(self, query: str) -> NormalizeService: Use to retrieve normalized gene concept records: >>> from gene.query import QueryHandler - >>> from gene.database import create_db - >>> q = QueryHandler(create_db()) + >>> from gene.database import get_db + >>> q = QueryHandler(get_db()) >>> result = q.normalize("BRAF") >>> result.normalized_id 'hgnc:1097' @@ -671,9 +671,9 @@ def normalize_unmerged(self, query: str) -> UnmergedNormalizationService: provided query string. >>> from gene.query import QueryHandler - >>> from gene.database import create_db + >>> from gene.database import get_db >>> from gene.schemas import SourceName - >>> q = QueryHandler(create_db()) + >>> q = QueryHandler(get_db()) >>> response = q.normalize_unmerged("BRAF") >>> response.match_type diff --git a/tests/conftest.py b/tests/conftest.py index 923d71ac..7aa3b1ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,13 +3,13 @@ import pytest -from gene.database import AbstractDatabase, create_db +from gene.database import AbstractDatabase, get_db @pytest.fixture(scope="session") def database() -> AbstractDatabase: """Create database instance.""" - return create_db() + return get_db() def pytest_addoption(parser): diff --git a/tests/unit/test_emit_warnings.py b/tests/unit/test_emit_warnings.py index c8309aac..1ccd7650 100644 --- a/tests/unit/test_emit_warnings.py +++ b/tests/unit/test_emit_warnings.py @@ -1,5 +1,5 @@ """Test the emit_warnings function.""" -from gene.database import create_db +from gene.database import get_db from gene.query import QueryHandler @@ -10,7 +10,7 @@ def test_emit_warnings(): "non_breaking_space_characters": "Query contains non-breaking space characters" } ] - db = create_db() + db = get_db() query_handler = QueryHandler(db) # Test emit no warnings