Skip to content

Commit

Permalink
fix: correctly configure logging (#349)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Jul 1, 2024
1 parent 50012e7 commit 4c18043
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 64 deletions.
34 changes: 25 additions & 9 deletions src/gene/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,16 @@
from gene.database.database import DatabaseException
from gene.schemas import SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


def _configure_logging() -> None:
"""Configure logging."""
logging.basicConfig(
filename=f"{__package__}.log",
format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s",
)
logging.getLogger(__package__).setLevel(logging.DEBUG)


@click.command()
Expand All @@ -32,6 +40,7 @@ def check_db(db_url: str, verbose: bool = False) -> None:
:param db_url: URL to normalizer database
:param verbose: if true, print result to console
""" # noqa: D301
_configure_logging()
db = create_db(db_url, False)
if not db.check_schema_initialized():
if verbose:
Expand All @@ -43,8 +52,10 @@ def check_db(db_url: str, verbose: bool = False) -> None:
click.echo("Health check failed: DB is incompletely populated.")
click.get_current_context().exit(1)

msg = "DB health check successful: tables appear complete."
if verbose:
click.echo("DB health check successful: tables appear complete.")
click.echo(msg)
_logger.info(msg)


@click.command()
Expand All @@ -59,6 +70,7 @@ def update_from_remote(data_url: Optional[str], db_url: str) -> None:
:param data_url: user-specified location to pull DB dump from
:param db_url: URL to normalizer database
""" # noqa: D301
_configure_logging()
if not click.confirm("Are you sure you want to overwrite existing data?"):
click.get_current_context().exit()
if not data_url:
Expand All @@ -74,6 +86,7 @@ def update_from_remote(data_url: Optional[str], db_url: str) -> None:
except DatabaseException as e:
click.echo(f"Encountered exception during update: {str(e)}")
click.get_current_context().exit(1)
_logger.info("Successfully loaded data from remote snapshot.")


@click.command()
Expand All @@ -91,6 +104,7 @@ def dump_database(output_directory: Path, db_url: str) -> None:
:param output_directory: path to existing directory
:param db_url: URL to normalizer database
""" # noqa: D301
_configure_logging()
if not output_directory:
output_directory = Path(".")

Expand All @@ -105,6 +119,7 @@ def dump_database(output_directory: Path, db_url: str) -> None:
except DatabaseException as e:
click.echo(f"Encountered exception during update: {str(e)}")
click.get_current_context().exit(1)
_logger.info("Database dump successful.")


def _update_normalizer(
Expand Down Expand Up @@ -140,14 +155,14 @@ def _delete_source(n: SourceName, db: AbstractDatabase) -> float:
"""
msg = f"Deleting {n.value}..."
click.echo(f"\n{msg}")
logger.info(msg)
_logger.info(msg)
start_delete = timer()
db.delete_source(n)
end_delete = timer()
delete_time = end_delete - start_delete
msg = f"Deleted {n.value} in {delete_time:.5f} seconds."
click.echo(f"{msg}\n")
logger.info(msg)
_logger.info(msg)
return delete_time


Expand All @@ -172,7 +187,7 @@ def _load_source(
"""
msg = f"Loading {n.value}..."
click.echo(msg)
logger.info(msg)
_logger.info(msg)
start_load = timer()

# used to get source class name from string
Expand All @@ -190,17 +205,17 @@ def _load_source(
try:
processed_ids += source.perform_etl(use_existing)
except GeneNormalizerEtlError as e:
logger.error(e)
_logger.error(e)
click.echo(f"Encountered error while loading {n}: {e}.")
click.get_current_context().exit()
end_load = timer()
load_time = end_load - start_load
msg = f"Loaded {n.value} in {load_time:.5f} seconds."
click.echo(msg)
logger.info(msg)
_logger.info(msg)
msg = f"Total time for {n.value}: {(delete_time + load_time):.5f} seconds."
click.echo(msg)
logger.info(msg)
_logger.info(msg)


def _delete_normalized_data(database: AbstractDatabase) -> None:
Expand Down Expand Up @@ -286,6 +301,7 @@ def update_normalizer_db(
:param update_merged: if true, update normalized records
:param use_existing: if True, use most recent local data instead of fetching latest version
""" # noqa: D301
_configure_logging()
db = create_db(db_url, aws_instance)

if update_all:
Expand Down
22 changes: 11 additions & 11 deletions src/gene/database/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName

logger = logging.getLogger(__name__)
_logger = logging.getLogger(__name__)


class DynamoDbDatabase(AbstractDatabase):
Expand Down Expand Up @@ -156,7 +156,7 @@ def check_schema_initialized(self) -> bool:
existing_tables = self.list_tables()
exists = self.gene_table in existing_tables
if not exists:
logger.info(f"{self.gene_table} table is missing or unavailable.")
_logger.info(f"{self.gene_table} table is missing or unavailable.")
return exists

def check_tables_populated(self) -> bool:
Expand All @@ -173,7 +173,7 @@ def check_tables_populated(self) -> bool:
KeyConditionExpression=Key("item_type").eq("source"),
).get("Items", [])
if len(sources) < len(SourceName):
logger.info("Gene sources table is missing expected sources.")
_logger.info("Gene sources table is missing expected sources.")
return False

records = self.genes.query(
Expand All @@ -182,7 +182,7 @@ def check_tables_populated(self) -> bool:
Limit=1,
)
if len(records.get("Items", [])) < 1:
logger.info("Gene records index is empty.")
_logger.info("Gene records index is empty.")
return False

normalized_records = self.genes.query(
Expand All @@ -191,7 +191,7 @@ def check_tables_populated(self) -> bool:
Limit=1,
)
if len(normalized_records.get("Items", [])) < 1:
logger.info("Normalized gene records index is empty.")
_logger.info("Normalized gene records index is empty.")
return False

return True
Expand Down Expand Up @@ -253,7 +253,7 @@ def get_record_by_id(
del record["label_and_type"]
return record
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error on get_records_by_id for "
f"search term {concept_id}: "
f"{e.response['Error']['Message']}"
Expand All @@ -276,7 +276,7 @@ def get_refs_by_type(self, search_term: str, ref_type: RefType) -> List[str]:
matches = self.genes.query(KeyConditionExpression=filter_exp)
return [m["concept_id"] for m in matches.get("Items", None)]
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error on get_refs_by_type for "
f"search term {search_term}: "
f"{e.response['Error']['Message']}"
Expand Down Expand Up @@ -380,7 +380,7 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
"boto3 client error on add_record for "
f"{concept_id}: {e.response['Error']['Message']}"
)
Expand Down Expand Up @@ -412,7 +412,7 @@ def add_merged_record(self, record: Dict) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
"boto3 client error on add_record for "
f"{concept_id}: {e.response['Error']['Message']}"
)
Expand All @@ -438,7 +438,7 @@ def _add_ref_record(
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error adding reference {term} for "
f"{concept_id} with match type {ref_type}: "
f"{e.response['Error']['Message']}"
Expand Down Expand Up @@ -470,7 +470,7 @@ def update_merge_ref(self, concept_id: str, merge_ref: Any) -> None: # noqa: AN
f"No such record exists for keys {label_and_type}, {concept_id}"
)
else:
logger.error(
_logger.error(
f"boto3 client error in `database.update_record()`: "
f"{e.response['Error']['Message']}"
)
Expand Down
22 changes: 11 additions & 11 deletions src/gene/database/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName

logger = logging.getLogger(__name__)
_logger = logging.getLogger(__name__)


SCRIPTS_DIR = Path(__file__).parent / "postgresql"
Expand Down Expand Up @@ -119,7 +119,7 @@ def drop_db(self) -> None:
with self.conn.cursor() as cur:
cur.execute(self._drop_db_query)
self.conn.commit()
logger.info("Dropped all existing gene normalizer tables.")
_logger.info("Dropped all existing gene normalizer tables.")

def check_schema_initialized(self) -> bool:
"""Check if database schema is properly initialized.
Expand All @@ -132,7 +132,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene table existence check failed.")
_logger.info("Gene table existence check failed.")
self.conn.rollback()
return False
try:
Expand All @@ -141,7 +141,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateObject:
self.conn.rollback()
else:
logger.info("Gene foreign key existence check failed.")
_logger.info("Gene foreign key existence check failed.")
self.conn.rollback()
return False
try:
Expand All @@ -152,7 +152,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene normalized view lookup failed.")
_logger.info("Gene normalized view lookup failed.")
self.conn.rollback()
return False
try:
Expand All @@ -161,7 +161,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene indexes check failed.")
_logger.info("Gene indexes check failed.")
self.conn.rollback()
return False

Expand All @@ -184,21 +184,21 @@ def check_tables_populated(self) -> bool:
cur.execute(self._check_sources_query)
results = cur.fetchall()
if len(results) < len(SourceName):
logger.info("Gene sources table is missing expected sources.")
_logger.info("Gene sources table is missing expected sources.")
return False

with self.conn.cursor() as cur:
cur.execute(self._check_concepts_query)
result = cur.fetchone()
if not result or result[0] < 1:
logger.info("Gene records table is empty.")
_logger.info("Gene records table is empty.")
return False

with self.conn.cursor() as cur:
cur.execute(self._check_merged_query)
result = cur.fetchone()
if not result or result[0] < 1:
logger.info("Normalized gene records table is empty.")
_logger.info("Normalized gene records table is empty.")
return False

return True
Expand Down Expand Up @@ -260,7 +260,7 @@ def _drop_indexes(self) -> None:

def _create_tables(self) -> None:
"""Create all tables, indexes, and views."""
logger.debug("Creating new gene normalizer tables.")
_logger.debug("Creating new gene normalizer tables.")
tables_query = (SCRIPTS_DIR / "create_tables.sql").read_bytes()

with self.conn.cursor() as cur:
Expand Down Expand Up @@ -594,7 +594,7 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
cur.execute(self._ins_symbol_query, [record["symbol"], concept_id])
self.conn.commit()
except UniqueViolation:
logger.error(f"Record with ID {concept_id} already exists")
_logger.error(f"Record with ID {concept_id} already exists")
self.conn.rollback()

_add_merged_record_query = b"""
Expand Down
11 changes: 6 additions & 5 deletions src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from gene.database import AbstractDatabase
from gene.schemas import Gene, GeneSequenceLocation, MatchType, SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


DATA_DISPATCH = {
Expand Down Expand Up @@ -70,6 +69,7 @@ def perform_etl(self, use_existing: bool = False) -> List[str]:
uploaded.
"""
self._extract_data(use_existing)
_logger.info("Transforming and loading %s data to DB...", self._src_name.value)
if not self._silent:
click.echo("Transforming and loading data to DB...")
self._add_meta()
Expand All @@ -88,6 +88,7 @@ def _extract_data(self, use_existing: bool) -> None:
self._data_file, self._version = self._data_source.get_latest(
from_local=use_existing
)
_logger.info("Acquired data for %s: %s", self._src_name.value, self._data_file)

@abstractmethod
def _transform_data(self) -> None:
Expand All @@ -110,7 +111,7 @@ def _load_gene(self, gene: Dict) -> None:
try:
assert Gene(match_type=MatchType.NO_MATCH, **gene)
except pydantic.ValidationError as e:
logger.warning(f"Unable to load {gene} due to validation error: " f"{e}")
_logger.warning(f"Unable to load {gene} due to validation error: " f"{e}")
else:
concept_id = gene["concept_id"]
gene["label_and_type"] = f"{concept_id.lower()}##identity"
Expand Down Expand Up @@ -211,7 +212,7 @@ def _get_seq_id_aliases(self, seq_id: str) -> List[str]:
try:
aliases = self.seqrepo.translate_alias(seq_id, target_namespaces="ga4gh")
except KeyError as e:
logger.warning(f"SeqRepo raised KeyError: {e}")
_logger.warning(f"SeqRepo raised KeyError: {e}")
return aliases

def _get_sequence_location(self, seq_id: str, gene: Feature, params: Dict) -> Dict:
Expand All @@ -238,7 +239,7 @@ def _get_sequence_location(self, seq_id: str, gene: Feature, params: Dict) -> Di
sequence_id=sequence,
).model_dump() # type: ignore
else:
logger.warning(
_logger.warning(
f"{params['concept_id']} has invalid interval:"
f"start={gene.start - 1} end={gene.end}"
) # type: ignore
Expand Down
Loading

0 comments on commit 4c18043

Please sign in to comment.