From b0ed08440066773ab8d24f3ba46bb9216e431b4d Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 12 Jul 2024 10:55:06 -0400 Subject: [PATCH] refactor: use DataLicenseAttributes Pydantic model --- src/gene/database/postgresql.py | 24 +++++++++++++++--------- src/gene/etl/ensembl.py | 16 ++++++++++------ src/gene/etl/hgnc.py | 9 ++++----- src/gene/etl/ncbi.py | 9 ++++----- src/gene/schemas.py | 2 +- tests/unit/test_ensembl_source.py | 10 ++++------ tests/unit/test_hgnc_source.py | 10 ++++------ tests/unit/test_ncbi_source.py | 22 +++++++++++++--------- 8 files changed, 55 insertions(+), 47 deletions(-) diff --git a/src/gene/database/postgresql.py b/src/gene/database/postgresql.py index 59d4388..8f0bd56 100644 --- a/src/gene/database/postgresql.py +++ b/src/gene/database/postgresql.py @@ -26,7 +26,13 @@ DatabaseReadException, DatabaseWriteException, ) -from gene.schemas import RecordType, RefType, SourceMeta, SourceName +from gene.schemas import ( + DataLicenseAttributes, + RecordType, + RefType, + SourceMeta, + SourceName, +) _logger = logging.getLogger(__name__) @@ -293,11 +299,11 @@ def get_source_metadata(self, src_name: SourceName) -> dict: "version": metadata_result[3], "data_url": metadata_result[4], "rdp_url": metadata_result[5], - "data_license_attributes": { - "non_commercial": metadata_result[6], - "attribution": metadata_result[7], - "share_alike": metadata_result[8], - }, + "data_license_attributes": DataLicenseAttributes( + non_commercial=metadata_result[6], + attribution=metadata_result[7], + share_alike=metadata_result[8], + ), "genome_assemblies": metadata_result[9], } self._cached_sources[src_name] = metadata @@ -531,9 +537,9 @@ def add_source_metadata(self, src_name: SourceName, meta: SourceMeta) -> None: meta.version, json.dumps(meta.data_url), meta.rdp_url, - meta.data_license_attributes["non_commercial"], - meta.data_license_attributes["attribution"], - meta.data_license_attributes["share_alike"], + meta.data_license_attributes.non_commercial, + meta.data_license_attributes.attribution, + meta.data_license_attributes.share_alike, meta.genome_assemblies, ], ) diff --git a/src/gene/etl/ensembl.py b/src/gene/etl/ensembl.py index 89953d9..e56f70e 100644 --- a/src/gene/etl/ensembl.py +++ b/src/gene/etl/ensembl.py @@ -10,7 +10,13 @@ from gene.etl.exceptions import ( GeneNormalizerEtlError, ) -from gene.schemas import NamespacePrefix, SourceMeta, SourceName, Strand +from gene.schemas import ( + DataLicenseAttributes, + NamespacePrefix, + SourceMeta, + SourceName, + Strand, +) _logger = logging.getLogger(__name__) @@ -177,11 +183,9 @@ def _add_meta(self) -> None: "genome_annotations": f"ftp://ftp.ensembl.org/pub/release-{self._version}/gff3/homo_sapiens/Homo_sapiens.{self._assembly}.{self._version}.gff3.gz" }, rdp_url=None, - data_license_attributes={ - "non_commercial": False, - "share_alike": False, - "attribution": False, - }, + data_license_attributes=DataLicenseAttributes( + non_commercial=False, share_alike=False, attribution=False + ), genome_assemblies=[self._assembly], ) diff --git a/src/gene/etl/hgnc.py b/src/gene/etl/hgnc.py index fbb7f8a..151724b 100644 --- a/src/gene/etl/hgnc.py +++ b/src/gene/etl/hgnc.py @@ -12,6 +12,7 @@ from gene.schemas import ( Annotation, Chromosome, + DataLicenseAttributes, NamespacePrefix, SourceMeta, SourceName, @@ -258,11 +259,9 @@ def _add_meta(self) -> None: "complete_set_archive": "ftp.ebi.ac.uk/pub/databases/genenames/hgnc/json/hgnc_complete_set.json" }, rdp_url=None, - data_license_attributes={ - "non_commercial": False, - "share_alike": False, - "attribution": False, - }, + data_license_attributes=DataLicenseAttributes( + non_commercial=False, share_alike=False, attribution=False + ), genome_assemblies=[], ) self._database.add_source_metadata(SourceName.HGNC, metadata) diff --git a/src/gene/etl/ncbi.py b/src/gene/etl/ncbi.py index 5826e82..3e62423 100644 --- a/src/gene/etl/ncbi.py +++ b/src/gene/etl/ncbi.py @@ -18,6 +18,7 @@ from gene.schemas import ( Annotation, Chromosome, + DataLicenseAttributes, NamespacePrefix, SourceMeta, SourceName, @@ -491,11 +492,9 @@ def _add_meta(self) -> None: "assembly_file": self._assembly_url, }, rdp_url="https://reusabledata.org/ncbi-gene.html", - data_license_attributes={ - "non_commercial": False, - "share_alike": False, - "attribution": False, - }, + data_license_attributes=DataLicenseAttributes( + non_commercial=False, share_alike=False, attribution=False + ), genome_assemblies=[self._assembly], ) diff --git a/src/gene/schemas.py b/src/gene/schemas.py index 6e96e32..66fdf42 100644 --- a/src/gene/schemas.py +++ b/src/gene/schemas.py @@ -227,7 +227,7 @@ class SourceMeta(BaseModel): version: StrictStr data_url: dict[StrictStr, StrictStr] # TODO strictness necessary? rdp_url: StrictStr | None = None - data_license_attributes: dict[StrictStr, StrictBool] + data_license_attributes: DataLicenseAttributes genome_assemblies: list[StrictStr] = [] model_config = ConfigDict( diff --git a/tests/unit/test_ensembl_source.py b/tests/unit/test_ensembl_source.py index acc2c71..0bc9cd4 100644 --- a/tests/unit/test_ensembl_source.py +++ b/tests/unit/test_ensembl_source.py @@ -3,7 +3,7 @@ import pytest from gene.query import QueryHandler -from gene.schemas import Gene, MatchType, SourceName +from gene.schemas import DataLicenseAttributes, Gene, MatchType, SourceName @pytest.fixture(scope="module") @@ -308,8 +308,6 @@ def test_meta_info(ensembl): } assert resp.source_meta_.rdp_url is None assert resp.source_meta_.genome_assemblies == ["GRCh38"] - assert resp.source_meta_.data_license_attributes == { - "non_commercial": False, - "share_alike": False, - "attribution": False, - } + assert resp.source_meta_.data_license_attributes == DataLicenseAttributes( + non_commercial=False, share_alike=False, attribution=False + ) diff --git a/tests/unit/test_hgnc_source.py b/tests/unit/test_hgnc_source.py index a771efa..15bac93 100644 --- a/tests/unit/test_hgnc_source.py +++ b/tests/unit/test_hgnc_source.py @@ -5,7 +5,7 @@ import pytest from gene.query import QueryHandler -from gene.schemas import Gene, MatchType, SourceName +from gene.schemas import DataLicenseAttributes, Gene, MatchType, SourceName @pytest.fixture(scope="module") @@ -824,8 +824,6 @@ def test_meta_info(hgnc): } assert resp.source_meta_.rdp_url is None assert resp.source_meta_.genome_assemblies == [] - assert resp.source_meta_.data_license_attributes == { - "non_commercial": False, - "share_alike": False, - "attribution": False, - } + assert resp.source_meta_.data_license_attributes == DataLicenseAttributes( + non_commercial=False, share_alike=False, attribution=False + ) diff --git a/tests/unit/test_ncbi_source.py b/tests/unit/test_ncbi_source.py index ab64713..5d2825e 100644 --- a/tests/unit/test_ncbi_source.py +++ b/tests/unit/test_ncbi_source.py @@ -5,7 +5,13 @@ import pytest from gene.query import QueryHandler -from gene.schemas import Gene, MatchType, SourceName, SymbolStatus +from gene.schemas import ( + DataLicenseAttributes, + Gene, + MatchType, + SourceName, + SymbolStatus, +) def check_ncbi_discontinued_gene(normalizer_response, concept_id, symbol, match_type): @@ -852,9 +858,9 @@ def test_no_match(ncbi, source_urls): ) assert response.source_meta_.data_url == source_urls assert response.source_meta_.rdp_url == "https://reusabledata.org/ncbi-gene.html" - assert not response.source_meta_.data_license_attributes["non_commercial"] - assert not response.source_meta_.data_license_attributes["share_alike"] - assert not response.source_meta_.data_license_attributes["attribution"] + assert not response.source_meta_.data_license_attributes.non_commercial + assert not response.source_meta_.data_license_attributes.share_alike + assert not response.source_meta_.data_license_attributes.attribution # check blank response = ncbi.search("") @@ -901,8 +907,6 @@ def test_meta(ncbi, source_urls): assert response.source_meta_.data_url == source_urls assert response.source_meta_.rdp_url == "https://reusabledata.org/ncbi-gene.html" assert response.source_meta_.genome_assemblies == ["GRCh38.p14"] - assert response.source_meta_.data_license_attributes == { - "non_commercial": False, - "share_alike": False, - "attribution": False, - } + assert response.source_meta_.data_license_attributes == DataLicenseAttributes( + non_commercial=False, share_alike=False, attribution=False + )