Skip to content

Commit

Permalink
refactor: use DataLicenseAttributes Pydantic model
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jul 12, 2024
1 parent 74a00ce commit b0ed084
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 47 deletions.
24 changes: 15 additions & 9 deletions src/gene/database/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@
DatabaseReadException,
DatabaseWriteException,
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName
from gene.schemas import (
DataLicenseAttributes,
RecordType,
RefType,
SourceMeta,
SourceName,
)

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -293,11 +299,11 @@ def get_source_metadata(self, src_name: SourceName) -> dict:
"version": metadata_result[3],
"data_url": metadata_result[4],
"rdp_url": metadata_result[5],
"data_license_attributes": {
"non_commercial": metadata_result[6],
"attribution": metadata_result[7],
"share_alike": metadata_result[8],
},
"data_license_attributes": DataLicenseAttributes(
non_commercial=metadata_result[6],
attribution=metadata_result[7],
share_alike=metadata_result[8],
),
"genome_assemblies": metadata_result[9],
}
self._cached_sources[src_name] = metadata
Expand Down Expand Up @@ -531,9 +537,9 @@ def add_source_metadata(self, src_name: SourceName, meta: SourceMeta) -> None:
meta.version,
json.dumps(meta.data_url),
meta.rdp_url,
meta.data_license_attributes["non_commercial"],
meta.data_license_attributes["attribution"],
meta.data_license_attributes["share_alike"],
meta.data_license_attributes.non_commercial,
meta.data_license_attributes.attribution,
meta.data_license_attributes.share_alike,
meta.genome_assemblies,
],
)
Expand Down
16 changes: 10 additions & 6 deletions src/gene/etl/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from gene.etl.exceptions import (
GeneNormalizerEtlError,
)
from gene.schemas import NamespacePrefix, SourceMeta, SourceName, Strand
from gene.schemas import (
DataLicenseAttributes,
NamespacePrefix,
SourceMeta,
SourceName,
Strand,
)

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -177,11 +183,9 @@ def _add_meta(self) -> None:
"genome_annotations": f"ftp://ftp.ensembl.org/pub/release-{self._version}/gff3/homo_sapiens/Homo_sapiens.{self._assembly}.{self._version}.gff3.gz"
},
rdp_url=None,
data_license_attributes={
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
data_license_attributes=DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
),
genome_assemblies=[self._assembly],
)

Expand Down
9 changes: 4 additions & 5 deletions src/gene/etl/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from gene.schemas import (
Annotation,
Chromosome,
DataLicenseAttributes,
NamespacePrefix,
SourceMeta,
SourceName,
Expand Down Expand Up @@ -258,11 +259,9 @@ def _add_meta(self) -> None:
"complete_set_archive": "ftp.ebi.ac.uk/pub/databases/genenames/hgnc/json/hgnc_complete_set.json"
},
rdp_url=None,
data_license_attributes={
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
data_license_attributes=DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
),
genome_assemblies=[],
)
self._database.add_source_metadata(SourceName.HGNC, metadata)
9 changes: 4 additions & 5 deletions src/gene/etl/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from gene.schemas import (
Annotation,
Chromosome,
DataLicenseAttributes,
NamespacePrefix,
SourceMeta,
SourceName,
Expand Down Expand Up @@ -491,11 +492,9 @@ def _add_meta(self) -> None:
"assembly_file": self._assembly_url,
},
rdp_url="https://reusabledata.org/ncbi-gene.html",
data_license_attributes={
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
data_license_attributes=DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
),
genome_assemblies=[self._assembly],
)

Expand Down
2 changes: 1 addition & 1 deletion src/gene/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ class SourceMeta(BaseModel):
version: StrictStr
data_url: dict[StrictStr, StrictStr] # TODO strictness necessary?
rdp_url: StrictStr | None = None
data_license_attributes: dict[StrictStr, StrictBool]
data_license_attributes: DataLicenseAttributes
genome_assemblies: list[StrictStr] = []

model_config = ConfigDict(
Expand Down
10 changes: 4 additions & 6 deletions tests/unit/test_ensembl_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from gene.query import QueryHandler
from gene.schemas import Gene, MatchType, SourceName
from gene.schemas import DataLicenseAttributes, Gene, MatchType, SourceName


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -308,8 +308,6 @@ def test_meta_info(ensembl):
}
assert resp.source_meta_.rdp_url is None
assert resp.source_meta_.genome_assemblies == ["GRCh38"]
assert resp.source_meta_.data_license_attributes == {
"non_commercial": False,
"share_alike": False,
"attribution": False,
}
assert resp.source_meta_.data_license_attributes == DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
)
10 changes: 4 additions & 6 deletions tests/unit/test_hgnc_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from gene.query import QueryHandler
from gene.schemas import Gene, MatchType, SourceName
from gene.schemas import DataLicenseAttributes, Gene, MatchType, SourceName


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -824,8 +824,6 @@ def test_meta_info(hgnc):
}
assert resp.source_meta_.rdp_url is None
assert resp.source_meta_.genome_assemblies == []
assert resp.source_meta_.data_license_attributes == {
"non_commercial": False,
"share_alike": False,
"attribution": False,
}
assert resp.source_meta_.data_license_attributes == DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
)
22 changes: 13 additions & 9 deletions tests/unit/test_ncbi_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
import pytest

from gene.query import QueryHandler
from gene.schemas import Gene, MatchType, SourceName, SymbolStatus
from gene.schemas import (
DataLicenseAttributes,
Gene,
MatchType,
SourceName,
SymbolStatus,
)


def check_ncbi_discontinued_gene(normalizer_response, concept_id, symbol, match_type):
Expand Down Expand Up @@ -852,9 +858,9 @@ def test_no_match(ncbi, source_urls):
)
assert response.source_meta_.data_url == source_urls
assert response.source_meta_.rdp_url == "https://reusabledata.org/ncbi-gene.html"
assert not response.source_meta_.data_license_attributes["non_commercial"]
assert not response.source_meta_.data_license_attributes["share_alike"]
assert not response.source_meta_.data_license_attributes["attribution"]
assert not response.source_meta_.data_license_attributes.non_commercial
assert not response.source_meta_.data_license_attributes.share_alike
assert not response.source_meta_.data_license_attributes.attribution

# check blank
response = ncbi.search("")
Expand Down Expand Up @@ -901,8 +907,6 @@ def test_meta(ncbi, source_urls):
assert response.source_meta_.data_url == source_urls
assert response.source_meta_.rdp_url == "https://reusabledata.org/ncbi-gene.html"
assert response.source_meta_.genome_assemblies == ["GRCh38.p14"]
assert response.source_meta_.data_license_attributes == {
"non_commercial": False,
"share_alike": False,
"attribution": False,
}
assert response.source_meta_.data_license_attributes == DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
)

0 comments on commit b0ed084

Please sign in to comment.