Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: configure logging in CLI #342

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/gene/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provides a CLI util to make updates to normalizer database."""
import logging
import os
from pathlib import Path
from typing import Optional, Tuple
Expand All @@ -10,9 +11,20 @@
from gene.etl.update import update_all_sources, update_normalized, update_source
from gene.schemas import SourceName

_logger = logging.getLogger(__name__)

url_description = 'URL endpoint for the application database. Can either be a URL to a local DynamoDB server (e.g. "http://localhost:8001") or a libpq-compliant PostgreSQL connection description (e.g. "postgresql://postgres:password@localhost:5432/gene_normalizer").'


def _configure_logging() -> None:
"""Configure logging."""
logging.basicConfig(
filename=f"{__package__}.log",
format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s",
)
logging.getLogger(__package__).setLevel(logging.DEBUG)


@click.group()
def cli() -> None:
"""Manage Gene Normalizer data."""
Expand Down Expand Up @@ -71,6 +83,7 @@ def update(
:param use_existing: if True, use most recent local data instead of fetching latest version
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
if (not sources) and (not all_) and (not normalize):
click.echo(
"Error: must provide SOURCES or at least one of --all, --normalize\n"
Expand Down Expand Up @@ -125,6 +138,7 @@ def update_from_remote(data_url: Optional[str], db_url: str, silent: bool) -> No
:param db_url: URL to normalizer database
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
if not click.confirm("Are you sure you want to overwrite existing data?"):
click.get_current_context().exit()
if not data_url:
Expand All @@ -140,6 +154,7 @@ def update_from_remote(data_url: Optional[str], db_url: str, silent: bool) -> No
except DatabaseError as e:
click.echo(f"Encountered exception during update: {e!s}")
click.get_current_context().exit(1)
_logger.info("Successfully loaded data from remote snapshot.")


@cli.command()
Expand Down Expand Up @@ -175,6 +190,7 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None:
:param verbose: if true, print result to console
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
db = create_db(db_url, False, silent)
if not db.check_schema_initialized():
if verbose:
Expand All @@ -186,8 +202,10 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None:
click.echo("Health check failed: DB is incompletely populated.")
click.get_current_context().exit(1)

msg = "DB health check successful: tables appear complete."
if verbose:
click.echo("DB health check successful: tables appear complete.")
click.echo(msg)
_logger.info(msg)


@cli.command()
Expand All @@ -209,6 +227,7 @@ def dump_database(output_directory: Path, db_url: str, silent: bool) -> None:
:param db_url: URL to normalizer database
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
if not output_directory:
output_directory = Path()

Expand All @@ -223,6 +242,7 @@ def dump_database(output_directory: Path, db_url: str, silent: bool) -> None:
except DatabaseError as e:
click.echo(f"Encountered exception during update: {e!s}")
click.get_current_context().exit(1)
_logger.info("Database dump successful.")


if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def perform_etl(self, use_existing: bool = False) -> List[str]:
uploaded.
"""
self._extract_data(use_existing)
_logger.info("Transforming and loading %s data to DB...", self._src_name)
_logger.info("Transforming and loading %s data to DB...", self._src_name.value)
if not self._silent:
click.echo("Transforming and loading data to DB...")
self._add_meta()
Expand All @@ -106,6 +106,7 @@ def _extract_data(self, use_existing: bool) -> None:
self._data_file, self._version = self._data_source.get_latest(
from_local=use_existing
)
_logger.info("Acquired data for %s: %s", self._src_name.value, self._data_file)

@abstractmethod
def _transform_data(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion src/gene/etl/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _extract_data(self, use_existing: bool) -> None:

:param use_existing: if True, use latest available local file
"""
_logger.info("Gathering {self._src_name} data...", self._src_name)
_logger.info("Gathering %s data...", self._src_name.value)
self._gff_src, self._assembly = self._genome_data_handler.get_latest(
from_local=use_existing
)
Expand Down
60 changes: 28 additions & 32 deletions src/gene/etl/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@
_logger = logging.getLogger(__name__)


def _emit_info_msg(msg: str, silent: bool) -> None:
"""Handle info message display.

:param msg: message to log/print
:param silent: if True, don't print to console
"""
if not silent:
click.echo(msg)
_logger.info(msg)


def delete_source(
source: SourceName, db: AbstractDatabase, silent: bool = True
) -> float:
Expand All @@ -25,18 +36,12 @@ def delete_source(
:param silent: if True, suppress console output
:return: time spent deleting source
"""
msg = f"Deleting {source.value}..."
if not silent:
click.echo(f"\n{msg}")
_logger.info(msg)
_emit_info_msg(f"Deleting {source.value}...", silent)
start_delete = timer()
db.delete_source(source)
end_delete = timer()
delete_time = end_delete - start_delete
msg = f"Deleted {source.value} in {delete_time:.5f} seconds."
if not silent:
click.echo(f"{msg}")
_logger.info(msg)
_emit_info_msg(f"Deleted {source.value} in {delete_time:.5f} seconds.", silent)
return delete_time


Expand All @@ -54,10 +59,7 @@ def load_source(
:param silent: if True, suppress console output
:return: time spent loading data, and set of processed IDs from that source
"""
msg = f"Loading {source.value}..."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(f"Loading {source.value}...", silent)
start_load = timer()

# used to get source class name from string
Expand All @@ -83,10 +85,10 @@ def load_source(
click.get_current_context().exit()
end_load = timer()
load_time = end_load - start_load
msg = f"Loaded {len(processed_ids)} records from {source.value} in {load_time:.5f} seconds."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(
f"Loaded {len(processed_ids)} records from {source.value} in {load_time:.5f} seconds.",
silent,
)

return (load_time, set(processed_ids))

Expand All @@ -112,10 +114,10 @@ def update_source(
"""
delete_time = delete_source(source, db, silent)
load_time, processed_ids = load_source(source, db, use_existing, silent)
msg = f"Total time for {source.value}: {(delete_time + load_time):.5f} seconds."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(
f"Total time for {source.value}: {(delete_time + load_time):.5f} seconds.",
silent,
)
return processed_ids


Expand All @@ -142,10 +144,7 @@ def delete_normalized(database: AbstractDatabase, silent: bool = True) -> None:
:param database: DB instance
:param silent: if True, suppress console output
"""
msg = "\nDeleting normalized records..."
_logger.info(msg)
if not silent:
click.echo(msg)
_emit_info_msg("\nDeleting normalized records...", silent)
start_delete = timer()
try:
database.delete_normalized_concepts()
Expand All @@ -154,10 +153,7 @@ def delete_normalized(database: AbstractDatabase, silent: bool = True) -> None:
raise e
end_delete = timer()
delete_time = end_delete - start_delete
msg = f"Deleted normalized records in {delete_time:.5f} seconds."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(f"Deleted normalized records in {delete_time:.5f} seconds.", silent)


def update_normalized(
Expand Down Expand Up @@ -190,10 +186,10 @@ def update_normalized(
click.echo("Constructing normalized records...")
merge.create_merged_concepts(processed_ids)
end = timer()
msg = f"Merged concept generation completed in " f"{(end - start):.5f} seconds"
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(
f"Merged concept generation completed in " f"{(end - start):.5f} seconds",
silent,
)


def update_all_and_normalize(
Expand Down
Loading