Skip to content

Commit

Permalink
chore: clean up logging
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Dec 12, 2023
1 parent fd2ae77 commit 1701fc2
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 73 deletions.
15 changes: 7 additions & 8 deletions src/gene/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
from gene.database.database import DatabaseException
from gene.schemas import SOURCES, SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


@click.command()
Expand Down Expand Up @@ -139,14 +138,14 @@ def _delete_source(n: SourceName, db: AbstractDatabase) -> float:
"""
msg = f"Deleting {n.value}..."
click.echo(f"\n{msg}")
logger.info(msg)
_logger.info(msg)
start_delete = timer()
db.delete_source(n)
end_delete = timer()
delete_time = end_delete - start_delete
msg = f"Deleted {n.value} in {delete_time:.5f} seconds."
click.echo(f"{msg}\n")
logger.info(msg)
_logger.info(msg)
return delete_time


Expand All @@ -171,7 +170,7 @@ def _load_source(
"""
msg = f"Loading {n.value}..."
click.echo(msg)
logger.info(msg)
_logger.info(msg)
start_load = timer()

# used to get source class name from string
Expand All @@ -189,17 +188,17 @@ def _load_source(
try:
processed_ids += source.perform_etl(use_existing)
except GeneNormalizerEtlError as e:
logger.error(e)
_logger.error(e)
click.echo(f"Encountered error while loading {n}: {e}.")
click.get_current_context().exit()
end_load = timer()
load_time = end_load - start_load
msg = f"Loaded {n.value} in {load_time:.5f} seconds."
click.echo(msg)
logger.info(msg)
_logger.info(msg)
msg = f"Total time for {n.value}: {(delete_time + load_time):.5f} seconds."
click.echo(msg)
logger.info(msg)
_logger.info(msg)


def _delete_normalized_data(database: AbstractDatabase) -> None:
Expand Down
22 changes: 11 additions & 11 deletions src/gene/database/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
SourceName,
)

logger = logging.getLogger(__name__)
_logger = logging.getLogger(__name__)


class DynamoDbDatabase(AbstractDatabase):
Expand Down Expand Up @@ -162,7 +162,7 @@ def check_schema_initialized(self) -> bool:
existing_tables = self.list_tables()
exists = self.gene_table in existing_tables
if not exists:
logger.info(f"{self.gene_table} table is missing or unavailable.")
_logger.info(f"{self.gene_table} table is missing or unavailable.")
return exists

def check_tables_populated(self) -> bool:
Expand All @@ -179,7 +179,7 @@ def check_tables_populated(self) -> bool:
KeyConditionExpression=Key("item_type").eq("source"),
).get("Items", [])
if len(sources) < len(SourceName):
logger.info("Gene sources table is missing expected sources.")
_logger.info("Gene sources table is missing expected sources.")
return False

records = self.genes.query(
Expand All @@ -188,7 +188,7 @@ def check_tables_populated(self) -> bool:
Limit=1,
)
if len(records.get("Items", [])) < 1:
logger.info("Gene records index is empty.")
_logger.info("Gene records index is empty.")
return False

normalized_records = self.genes.query(
Expand All @@ -197,7 +197,7 @@ def check_tables_populated(self) -> bool:
Limit=1,
)
if len(normalized_records.get("Items", [])) < 1:
logger.info("Normalized gene records index is empty.")
_logger.info("Normalized gene records index is empty.")
return False

return True
Expand Down Expand Up @@ -259,7 +259,7 @@ def get_record_by_id(
del record["label_and_type"]
return record
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error on get_records_by_id for "
f"search term {concept_id}: "
f"{e.response['Error']['Message']}"
Expand All @@ -282,7 +282,7 @@ def get_refs_by_type(self, search_term: str, ref_type: RefType) -> List[str]:
matches = self.genes.query(KeyConditionExpression=filter_exp)
return [m["concept_id"] for m in matches.get("Items", None)]
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error on get_refs_by_type for "
f"search term {search_term}: "
f"{e.response['Error']['Message']}"
Expand Down Expand Up @@ -386,7 +386,7 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
"boto3 client error on add_record for "
f"{concept_id}: {e.response['Error']['Message']}"
)
Expand Down Expand Up @@ -418,7 +418,7 @@ def add_merged_record(self, record: Dict) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
"boto3 client error on add_record for "
f"{concept_id}: {e.response['Error']['Message']}"
)
Expand All @@ -444,7 +444,7 @@ def _add_ref_record(
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error adding reference {term} for "
f"{concept_id} with match type {ref_type}: "
f"{e.response['Error']['Message']}"
Expand Down Expand Up @@ -476,7 +476,7 @@ def update_merge_ref(self, concept_id: str, merge_ref: Any) -> None: # noqa: AN
f"No such record exists for keys {label_and_type}, {concept_id}"
)
else:
logger.error(
_logger.error(
f"boto3 client error in `database.update_record()`: "
f"{e.response['Error']['Message']}"
)
Expand Down
22 changes: 11 additions & 11 deletions src/gene/database/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName

logger = logging.getLogger(__name__)
_logger = logging.getLogger(__name__)


SCRIPTS_DIR = Path(__file__).parent / "postgresql"
Expand Down Expand Up @@ -119,7 +119,7 @@ def drop_db(self) -> None:
with self.conn.cursor() as cur:
cur.execute(self._drop_db_query)
self.conn.commit()
logger.info("Dropped all existing gene normalizer tables.")
_logger.info("Dropped all existing gene normalizer tables.")

def check_schema_initialized(self) -> bool:
"""Check if database schema is properly initialized.
Expand All @@ -132,7 +132,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene table existence check failed.")
_logger.info("Gene table existence check failed.")
self.conn.rollback()
return False
try:
Expand All @@ -141,7 +141,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateObject:
self.conn.rollback()
else:
logger.info("Gene foreign key existence check failed.")
_logger.info("Gene foreign key existence check failed.")
self.conn.rollback()
return False
try:
Expand All @@ -152,7 +152,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene normalized view lookup failed.")
_logger.info("Gene normalized view lookup failed.")
self.conn.rollback()
return False
try:
Expand All @@ -161,7 +161,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene indexes check failed.")
_logger.info("Gene indexes check failed.")
self.conn.rollback()
return False

Expand All @@ -184,21 +184,21 @@ def check_tables_populated(self) -> bool:
cur.execute(self._check_sources_query)
results = cur.fetchall()
if len(results) < len(SourceName):
logger.info("Gene sources table is missing expected sources.")
_logger.info("Gene sources table is missing expected sources.")
return False

with self.conn.cursor() as cur:
cur.execute(self._check_concepts_query)
result = cur.fetchone()
if not result or result[0] < 1:
logger.info("Gene records table is empty.")
_logger.info("Gene records table is empty.")
return False

with self.conn.cursor() as cur:
cur.execute(self._check_merged_query)
result = cur.fetchone()
if not result or result[0] < 1:
logger.info("Normalized gene records table is empty.")
_logger.info("Normalized gene records table is empty.")
return False

return True
Expand Down Expand Up @@ -260,7 +260,7 @@ def _drop_indexes(self) -> None:

def _create_tables(self) -> None:
"""Create all tables, indexes, and views."""
logger.debug("Creating new gene normalizer tables.")
_logger.debug("Creating new gene normalizer tables.")
tables_query = (SCRIPTS_DIR / "create_tables.sql").read_bytes()

with self.conn.cursor() as cur:
Expand Down Expand Up @@ -594,7 +594,7 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
cur.execute(self._ins_symbol_query, [record["symbol"], concept_id])
self.conn.commit()
except UniqueViolation:
logger.error(f"Record with ID {concept_id} already exists")
_logger.error(f"Record with ID {concept_id} already exists")
self.conn.rollback()

_add_merged_record_query = b"""
Expand Down
9 changes: 4 additions & 5 deletions src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
from gene.database import AbstractDatabase
from gene.schemas import ITEM_TYPES, Gene, GeneSequenceLocation, MatchType, SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


APP_ROOT = Path(__file__).resolve().parent
Expand Down Expand Up @@ -128,7 +127,7 @@ def _load_gene(self, gene: Dict) -> None:
try:
assert Gene(match_type=MatchType.NO_MATCH, **gene)
except pydantic.ValidationError as e:
logger.warning(f"Unable to load {gene} due to validation error: " f"{e}")
_logger.warning(f"Unable to load {gene} due to validation error: " f"{e}")
else:
concept_id = gene["concept_id"]
gene["label_and_type"] = f"{concept_id.lower()}##identity"
Expand Down Expand Up @@ -272,7 +271,7 @@ def _get_seq_id_aliases(self, seq_id: str) -> List[str]:
try:
aliases = self.seqrepo.translate_alias(seq_id, target_namespaces="ga4gh")
except KeyError as e:
logger.warning(f"SeqRepo raised KeyError: {e}")
_logger.warning(f"SeqRepo raised KeyError: {e}")
return aliases

def _get_sequence_location(self, seq_id: str, gene: Feature, params: Dict) -> Dict:
Expand All @@ -299,7 +298,7 @@ def _get_sequence_location(self, seq_id: str, gene: Feature, params: Dict) -> Di
sequence_id=sequence,
).model_dump() # type: ignore
else:
logger.warning(
_logger.warning(
f"{params['concept_id']} has invalid interval:"
f"start={gene.start - 1} end={gene.end}"
) # type: ignore
Expand Down
11 changes: 5 additions & 6 deletions src/gene/etl/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
)
from gene.schemas import NamespacePrefix, SourceMeta, SourceName, Strand

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


class Ensembl(Base):
Expand Down Expand Up @@ -84,7 +83,7 @@ def _download_data(self) -> Path:
:return: path to acquired file
:raise GeneSourceFetchError: if unable to find file matching expected pattern
"""
logger.info("Downloading latest Ensembl data file...")
_logger.info("Downloading latest Ensembl data file...")
pattern = r"Homo_sapiens\.(?P<assembly>GRCh\d+)\.(?P<version>\d+)\.gff3\.gz"
with FTP(self._host) as ftp:
ftp.login()
Expand All @@ -98,7 +97,7 @@ def _download_data(self) -> Path:
version = resp["version"]
new_fn = f"ensembl_{assembly}_{version}.gff3"
self._ftp_download_file(ftp, f, self.src_data_dir, new_fn)
logger.info(
_logger.info(
f"Successfully downloaded Ensembl {version} data to {self.src_data_dir / new_fn}."
)
return self.src_data_dir / new_fn
Expand All @@ -120,7 +119,7 @@ def _extract_data(self, use_existing: bool) -> None:

def _transform_data(self) -> None:
"""Transform the Ensembl source."""
logger.info("Transforming Ensembl...")
_logger.info("Transforming Ensembl...")
db = gffutils.create_db(
str(self._data_src),
dbfn=":memory:",
Expand All @@ -143,7 +142,7 @@ def _transform_data(self) -> None:
gene = self._add_gene(f, accession_numbers)
if gene:
self._load_gene(gene)
logger.info("Successfully transformed Ensembl.")
_logger.info("Successfully transformed Ensembl.")

def _add_gene(self, f: Feature, accession_numbers: Dict) -> Dict:
"""Create a transformed gene record.
Expand Down
13 changes: 6 additions & 7 deletions src/gene/etl/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
SymbolStatus,
)

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


class HGNC(Base):
Expand Down Expand Up @@ -87,15 +86,15 @@ def _download_data(self) -> Path:
:return: path to newly-downloaded file
"""
logger.info("Downloading HGNC data file...")
_logger.info("Downloading HGNC data file...")

tmp_fn = "hgnc_version.json"
version = self._ftp_download(
self._host, self._data_dir, tmp_fn, self.src_data_dir, self._fn
)
final_location = f"{self.src_data_dir}/hgnc_{version}.json"
shutil.move(f"{self.src_data_dir}/{tmp_fn}", final_location)
logger.info(f"Successfully downloaded HGNC data file to {final_location}.")
_logger.info(f"Successfully downloaded HGNC data file to {final_location}.")
return Path(final_location)

def _extract_data(self, use_existing: bool) -> None:
Expand All @@ -111,7 +110,7 @@ def _extract_data(self, use_existing: bool) -> None:

def _transform_data(self) -> None:
"""Transform the HGNC source."""
logger.info("Transforming HGNC...")
_logger.info("Transforming HGNC...")
with open(self._data_file, "r") as f:
data = json.load(f)

Expand Down Expand Up @@ -142,7 +141,7 @@ def _transform_data(self) -> None:
if "locus_type" in r:
gene["gene_type"] = r["locus_type"]
self._load_gene(gene)
logger.info("Successfully transformed HGNC.")
_logger.info("Successfully transformed HGNC.")

def _get_aliases(self, r: Dict, gene: Dict) -> None:
"""Store aliases in a gene record.
Expand Down Expand Up @@ -223,7 +222,7 @@ def _get_xrefs_associated_with(self, r: Dict, gene: Dict) -> None:
else:
self._get_xref_associated_with(key, src, r, associated_with)
else:
logger.warning(f"{key} not in schemas.py")
_logger.warning(f"{key} not in schemas.py")

if xrefs:
gene["xrefs"] = xrefs
Expand Down
Loading

0 comments on commit 1701fc2

Please sign in to comment.