Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use DataLicenseAttributes Pydantic model #361

Merged
merged 1 commit into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions src/gene/database/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@
DatabaseReadException,
DatabaseWriteException,
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName
from gene.schemas import (
DataLicenseAttributes,
RecordType,
RefType,
SourceMeta,
SourceName,
)

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -293,11 +299,11 @@ def get_source_metadata(self, src_name: SourceName) -> dict:
"version": metadata_result[3],
"data_url": metadata_result[4],
"rdp_url": metadata_result[5],
"data_license_attributes": {
"non_commercial": metadata_result[6],
"attribution": metadata_result[7],
"share_alike": metadata_result[8],
},
"data_license_attributes": DataLicenseAttributes(
non_commercial=metadata_result[6],
attribution=metadata_result[7],
share_alike=metadata_result[8],
),
"genome_assemblies": metadata_result[9],
}
self._cached_sources[src_name] = metadata
Expand Down Expand Up @@ -531,9 +537,9 @@ def add_source_metadata(self, src_name: SourceName, meta: SourceMeta) -> None:
meta.version,
json.dumps(meta.data_url),
meta.rdp_url,
meta.data_license_attributes["non_commercial"],
meta.data_license_attributes["attribution"],
meta.data_license_attributes["share_alike"],
meta.data_license_attributes.non_commercial,
meta.data_license_attributes.attribution,
meta.data_license_attributes.share_alike,
meta.genome_assemblies,
],
)
Expand Down
16 changes: 10 additions & 6 deletions src/gene/etl/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from gene.etl.exceptions import (
GeneNormalizerEtlError,
)
from gene.schemas import NamespacePrefix, SourceMeta, SourceName, Strand
from gene.schemas import (
DataLicenseAttributes,
NamespacePrefix,
SourceMeta,
SourceName,
Strand,
)

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -177,11 +183,9 @@ def _add_meta(self) -> None:
"genome_annotations": f"ftp://ftp.ensembl.org/pub/release-{self._version}/gff3/homo_sapiens/Homo_sapiens.{self._assembly}.{self._version}.gff3.gz"
},
rdp_url=None,
data_license_attributes={
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
data_license_attributes=DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
),
genome_assemblies=[self._assembly],
)

Expand Down
9 changes: 4 additions & 5 deletions src/gene/etl/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from gene.schemas import (
Annotation,
Chromosome,
DataLicenseAttributes,
NamespacePrefix,
SourceMeta,
SourceName,
Expand Down Expand Up @@ -258,11 +259,9 @@ def _add_meta(self) -> None:
"complete_set_archive": "ftp.ebi.ac.uk/pub/databases/genenames/hgnc/json/hgnc_complete_set.json"
},
rdp_url=None,
data_license_attributes={
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
data_license_attributes=DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
),
genome_assemblies=[],
)
self._database.add_source_metadata(SourceName.HGNC, metadata)
9 changes: 4 additions & 5 deletions src/gene/etl/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from gene.schemas import (
Annotation,
Chromosome,
DataLicenseAttributes,
NamespacePrefix,
SourceMeta,
SourceName,
Expand Down Expand Up @@ -491,11 +492,9 @@ def _add_meta(self) -> None:
"assembly_file": self._assembly_url,
},
rdp_url="https://reusabledata.org/ncbi-gene.html",
data_license_attributes={
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
data_license_attributes=DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
),
genome_assemblies=[self._assembly],
)

Expand Down
2 changes: 1 addition & 1 deletion src/gene/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ class SourceMeta(BaseModel):
version: StrictStr
data_url: dict[StrictStr, StrictStr] # TODO strictness necessary?
rdp_url: StrictStr | None = None
data_license_attributes: dict[StrictStr, StrictBool]
data_license_attributes: DataLicenseAttributes
genome_assemblies: list[StrictStr] = []

model_config = ConfigDict(
Expand Down
10 changes: 4 additions & 6 deletions tests/unit/test_ensembl_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from gene.query import QueryHandler
from gene.schemas import Gene, MatchType, SourceName
from gene.schemas import DataLicenseAttributes, Gene, MatchType, SourceName


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -308,8 +308,6 @@ def test_meta_info(ensembl):
}
assert resp.source_meta_.rdp_url is None
assert resp.source_meta_.genome_assemblies == ["GRCh38"]
assert resp.source_meta_.data_license_attributes == {
"non_commercial": False,
"share_alike": False,
"attribution": False,
}
assert resp.source_meta_.data_license_attributes == DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
)
10 changes: 4 additions & 6 deletions tests/unit/test_hgnc_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from gene.query import QueryHandler
from gene.schemas import Gene, MatchType, SourceName
from gene.schemas import DataLicenseAttributes, Gene, MatchType, SourceName


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -824,8 +824,6 @@ def test_meta_info(hgnc):
}
assert resp.source_meta_.rdp_url is None
assert resp.source_meta_.genome_assemblies == []
assert resp.source_meta_.data_license_attributes == {
"non_commercial": False,
"share_alike": False,
"attribution": False,
}
assert resp.source_meta_.data_license_attributes == DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
)
22 changes: 13 additions & 9 deletions tests/unit/test_ncbi_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
import pytest

from gene.query import QueryHandler
from gene.schemas import Gene, MatchType, SourceName, SymbolStatus
from gene.schemas import (
DataLicenseAttributes,
Gene,
MatchType,
SourceName,
SymbolStatus,
)


def check_ncbi_discontinued_gene(normalizer_response, concept_id, symbol, match_type):
Expand Down Expand Up @@ -852,9 +858,9 @@ def test_no_match(ncbi, source_urls):
)
assert response.source_meta_.data_url == source_urls
assert response.source_meta_.rdp_url == "https://reusabledata.org/ncbi-gene.html"
assert not response.source_meta_.data_license_attributes["non_commercial"]
assert not response.source_meta_.data_license_attributes["share_alike"]
assert not response.source_meta_.data_license_attributes["attribution"]
assert not response.source_meta_.data_license_attributes.non_commercial
assert not response.source_meta_.data_license_attributes.share_alike
assert not response.source_meta_.data_license_attributes.attribution

# check blank
response = ncbi.search("")
Expand Down Expand Up @@ -901,8 +907,6 @@ def test_meta(ncbi, source_urls):
assert response.source_meta_.data_url == source_urls
assert response.source_meta_.rdp_url == "https://reusabledata.org/ncbi-gene.html"
assert response.source_meta_.genome_assemblies == ["GRCh38.p14"]
assert response.source_meta_.data_license_attributes == {
"non_commercial": False,
"share_alike": False,
"attribution": False,
}
assert response.source_meta_.data_license_attributes == DataLicenseAttributes(
non_commercial=False, share_alike=False, attribution=False
)
Loading