diff --git a/src/gene/cli.py b/src/gene/cli.py index a04ff2a..b9f021f 100644 --- a/src/gene/cli.py +++ b/src/gene/cli.py @@ -1,4 +1,5 @@ """Provides a CLI util to make updates to normalizer database.""" +import logging import os from pathlib import Path from typing import Optional, Tuple @@ -10,9 +11,20 @@ from gene.etl.update import update_all_sources, update_normalized, update_source from gene.schemas import SourceName +_logger = logging.getLogger(__name__) + url_description = 'URL endpoint for the application database. Can either be a URL to a local DynamoDB server (e.g. "http://localhost:8001") or a libpq-compliant PostgreSQL connection description (e.g. "postgresql://postgres:password@localhost:5432/gene_normalizer").' +def _configure_logging() -> None: + """Configure logging.""" + logging.basicConfig( + filename=f"{__package__}.log", + format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s", + ) + logging.getLogger(__package__).setLevel(logging.DEBUG) + + @click.group() def cli() -> None: """Manage Gene Normalizer data.""" @@ -71,6 +83,7 @@ def update( :param use_existing: if True, use most recent local data instead of fetching latest version :param silent: if True, suppress console output """ # noqa: D301 + _configure_logging() if (not sources) and (not all_) and (not normalize): click.echo( "Error: must provide SOURCES or at least one of --all, --normalize\n" @@ -125,6 +138,7 @@ def update_from_remote(data_url: Optional[str], db_url: str, silent: bool) -> No :param db_url: URL to normalizer database :param silent: if True, suppress console output """ # noqa: D301 + _configure_logging() if not click.confirm("Are you sure you want to overwrite existing data?"): click.get_current_context().exit() if not data_url: @@ -140,6 +154,7 @@ def update_from_remote(data_url: Optional[str], db_url: str, silent: bool) -> No except DatabaseError as e: click.echo(f"Encountered exception during update: {e!s}") click.get_current_context().exit(1) + _logger.info("Successfully loaded data from remote snapshot.") @cli.command() @@ -175,6 +190,7 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None: :param verbose: if true, print result to console :param silent: if True, suppress console output """ # noqa: D301 + _configure_logging() db = create_db(db_url, False, silent) if not db.check_schema_initialized(): if verbose: @@ -186,8 +202,10 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None: click.echo("Health check failed: DB is incompletely populated.") click.get_current_context().exit(1) + msg = "DB health check successful: tables appear complete." if verbose: - click.echo("DB health check successful: tables appear complete.") + click.echo(msg) + _logger.info(msg) @cli.command() @@ -209,6 +227,7 @@ def dump_database(output_directory: Path, db_url: str, silent: bool) -> None: :param db_url: URL to normalizer database :param silent: if True, suppress console output """ # noqa: D301 + _configure_logging() if not output_directory: output_directory = Path() @@ -223,6 +242,7 @@ def dump_database(output_directory: Path, db_url: str, silent: bool) -> None: except DatabaseError as e: click.echo(f"Encountered exception during update: {e!s}") click.get_current_context().exit(1) + _logger.info("Database dump successful.") if __name__ == "__main__": diff --git a/src/gene/etl/base.py b/src/gene/etl/base.py index 44a251b..c13fe67 100644 --- a/src/gene/etl/base.py +++ b/src/gene/etl/base.py @@ -86,7 +86,7 @@ def perform_etl(self, use_existing: bool = False) -> List[str]: uploaded. """ self._extract_data(use_existing) - _logger.info("Transforming and loading %s data to DB...", self._src_name) + _logger.info("Transforming and loading %s data to DB...", self._src_name.value) if not self._silent: click.echo("Transforming and loading data to DB...") self._add_meta() @@ -106,6 +106,7 @@ def _extract_data(self, use_existing: bool) -> None: self._data_file, self._version = self._data_source.get_latest( from_local=use_existing ) + _logger.info("Acquired data for %s: %s", self._src_name.value, self._data_file) @abstractmethod def _transform_data(self) -> None: diff --git a/src/gene/etl/ncbi.py b/src/gene/etl/ncbi.py index 6ed7d84..e751d05 100644 --- a/src/gene/etl/ncbi.py +++ b/src/gene/etl/ncbi.py @@ -51,7 +51,7 @@ def _extract_data(self, use_existing: bool) -> None: :param use_existing: if True, use latest available local file """ - _logger.info("Gathering {self._src_name} data...", self._src_name) + _logger.info("Gathering %s data...", self._src_name.value) self._gff_src, self._assembly = self._genome_data_handler.get_latest( from_local=use_existing ) diff --git a/src/gene/etl/update.py b/src/gene/etl/update.py index 30451ed..44aeb7c 100644 --- a/src/gene/etl/update.py +++ b/src/gene/etl/update.py @@ -15,6 +15,17 @@ _logger = logging.getLogger(__name__) +def _emit_info_msg(msg: str, silent: bool) -> None: + """Handle info message display. + + :param msg: message to log/print + :param silent: if True, don't print to console + """ + if not silent: + click.echo(msg) + _logger.info(msg) + + def delete_source( source: SourceName, db: AbstractDatabase, silent: bool = True ) -> float: @@ -25,18 +36,12 @@ def delete_source( :param silent: if True, suppress console output :return: time spent deleting source """ - msg = f"Deleting {source.value}..." - if not silent: - click.echo(f"\n{msg}") - _logger.info(msg) + _emit_info_msg(f"Deleting {source.value}...", silent) start_delete = timer() db.delete_source(source) end_delete = timer() delete_time = end_delete - start_delete - msg = f"Deleted {source.value} in {delete_time:.5f} seconds." - if not silent: - click.echo(f"{msg}") - _logger.info(msg) + _emit_info_msg(f"Deleted {source.value} in {delete_time:.5f} seconds.", silent) return delete_time @@ -54,10 +59,7 @@ def load_source( :param silent: if True, suppress console output :return: time spent loading data, and set of processed IDs from that source """ - msg = f"Loading {source.value}..." - if not silent: - click.echo(msg) - _logger.info(msg) + _emit_info_msg(f"Loading {source.value}...", silent) start_load = timer() # used to get source class name from string @@ -83,10 +85,10 @@ def load_source( click.get_current_context().exit() end_load = timer() load_time = end_load - start_load - msg = f"Loaded {len(processed_ids)} records from {source.value} in {load_time:.5f} seconds." - if not silent: - click.echo(msg) - _logger.info(msg) + _emit_info_msg( + f"Loaded {len(processed_ids)} records from {source.value} in {load_time:.5f} seconds.", + silent, + ) return (load_time, set(processed_ids)) @@ -112,10 +114,10 @@ def update_source( """ delete_time = delete_source(source, db, silent) load_time, processed_ids = load_source(source, db, use_existing, silent) - msg = f"Total time for {source.value}: {(delete_time + load_time):.5f} seconds." - if not silent: - click.echo(msg) - _logger.info(msg) + _emit_info_msg( + f"Total time for {source.value}: {(delete_time + load_time):.5f} seconds.", + silent, + ) return processed_ids @@ -142,10 +144,7 @@ def delete_normalized(database: AbstractDatabase, silent: bool = True) -> None: :param database: DB instance :param silent: if True, suppress console output """ - msg = "\nDeleting normalized records..." - _logger.info(msg) - if not silent: - click.echo(msg) + _emit_info_msg("\nDeleting normalized records...", silent) start_delete = timer() try: database.delete_normalized_concepts() @@ -154,10 +153,7 @@ def delete_normalized(database: AbstractDatabase, silent: bool = True) -> None: raise e end_delete = timer() delete_time = end_delete - start_delete - msg = f"Deleted normalized records in {delete_time:.5f} seconds." - if not silent: - click.echo(msg) - _logger.info(msg) + _emit_info_msg(f"Deleted normalized records in {delete_time:.5f} seconds.", silent) def update_normalized( @@ -190,10 +186,10 @@ def update_normalized( click.echo("Constructing normalized records...") merge.create_merged_concepts(processed_ids) end = timer() - msg = f"Merged concept generation completed in " f"{(end - start):.5f} seconds" - if not silent: - click.echo(msg) - _logger.info(msg) + _emit_info_msg( + f"Merged concept generation completed in " f"{(end - start):.5f} seconds", + silent, + ) def update_all_and_normalize(