Skip to content

Commit

Permalink
feat!: update models to vrs 2.0.0 community review ballot (#378)
Browse files Browse the repository at this point in the history
close #377 

* Update modules to vrs
[2.0.0-ballot.2024-11.3](https://github.com/ga4gh/vrs/tree/2.0.0-ballot.2024-11.3)
tag
  * GKS `Gene` is now a `MappableConcept`
  * `alternativeLabels` moved to `extensions` with `name='aliases'`
  • Loading branch information
korikuzma authored Dec 23, 2024
1 parent d80408c commit 1ee3408
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 130 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dependencies = [
"uvicorn",
"click",
"boto3",
"ga4gh.vrs~=2.0.0a10",
"ga4gh.vrs==2.0.0a13",
]
dynamic = ["version"]

Expand All @@ -41,7 +41,7 @@ etl = [
"wags-tails~=0.2.1",
"setuptools", # pinned for 3.12 because yoyo-migrations still uses pkg_resources
]
test = ["pytest>=6.0", "pytest-cov", "mock", "httpx"]
test = ["pytest>=6.0", "pytest-cov", "mock", "httpx", "deepdiff"]
dev = ["pre-commit>=3.7.1", "ruff==0.5.0"]
docs = [
"sphinx==6.1.3",
Expand Down
51 changes: 27 additions & 24 deletions src/gene/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@
from collections.abc import Callable
from typing import Any, TypeVar

from ga4gh.core import domain_models, entity_models, ga4gh_identify
from ga4gh.vrs import models
from ga4gh.core import ga4gh_identify
from ga4gh.core.models import (
Coding,
ConceptMapping,
Extension,
MappableConcept,
Relation,
code,
)
from ga4gh.vrs.models import SequenceLocation, SequenceReference

from gene import ITEM_TYPES, NAMESPACE_LOOKUP, PREFIX_LOOKUP, __version__
from gene.database import AbstractDatabase, DatabaseReadException
Expand Down Expand Up @@ -81,16 +89,16 @@ def _emit_warnings(query_str: str) -> list:
return warnings

@staticmethod
def _transform_sequence_location(loc: dict) -> models.SequenceLocation:
def _transform_sequence_location(loc: dict) -> SequenceLocation:
"""Transform a sequence location to VRS sequence location
:param loc: GeneSequenceLocation represented as a dict
:return: VRS sequence location
"""
refget_ac = loc["sequence_id"].split("ga4gh:")[-1]

return models.SequenceLocation(
sequenceReference=models.SequenceReference(refgetAccession=refget_ac),
return SequenceLocation(
sequenceReference=SequenceReference(refgetAccession=refget_ac),
start=int(loc["start"]),
end=int(loc["end"]),
)
Expand Down Expand Up @@ -390,27 +398,29 @@ def _add_gene(
:param possible_concepts: List of other normalized concepts found
:return: Response with core Gene
"""
gene_obj = domain_models.Gene(
gene_obj = MappableConcept(
id=f"normalize.gene.{record['concept_id']}",
label=record["symbol"],
conceptType="Gene",
)

# mappings
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = []
for source_id in source_ids:
system, code = source_id.split(":")
system, system_code = source_id.split(":")
mappings.append(
entity_models.ConceptMapping(
coding=entity_models.Coding(
code=entity_models.Code(code), system=system.lower()
),
relation=entity_models.Relation.RELATED_MATCH,
ConceptMapping(
coding=Coding(code=code(system_code), system=system.lower()),
relation=Relation.RELATED_MATCH,
)
)
if mappings:
gene_obj.mappings = mappings

# extensions
extensions = []

# aliases
aliases = set()
for key in ["previous_symbols", "aliases"]:
Expand All @@ -420,10 +430,8 @@ def _add_gene(
val = [val]
aliases.update(val)
if aliases:
gene_obj.alternativeLabels = list(aliases)
extensions.append(Extension(name="aliases", value=list(aliases)))

# extensions
extensions = []
extension_and_record_labels = [
("symbol_status", "symbol_status"),
("approved_name", "label"),
Expand All @@ -433,9 +441,7 @@ def _add_gene(
]
for ext_label, record_label in extension_and_record_labels:
if record.get(record_label):
extensions.append(
entity_models.Extension(name=ext_label, value=record[record_label])
)
extensions.append(Extension(name=ext_label, value=record[record_label]))

record_locations = {}
if record["item_type"] == RecordType.IDENTITY:
Expand All @@ -455,16 +461,14 @@ def _add_gene(
]

if transformed_locs:
extensions.append(
entity_models.Extension(name=loc_name, value=transformed_locs)
)
extensions.append(Extension(name=loc_name, value=transformed_locs))

# handle gene types separately because they're wonky
if record["item_type"] == RecordType.IDENTITY:
gene_type = record.get("gene_type")
if gene_type:
extensions.append(
entity_models.Extension(
Extension(
name=GeneTypeFieldName[record["src_name"].upper()].value,
value=gene_type,
)
Expand All @@ -474,8 +478,7 @@ def _add_gene(
field_name = f.value
values = record.get(field_name, [])
extensions.extend(
entity_models.Extension(name=field_name, value=value)
for value in values
Extension(name=field_name, value=value) for value in values
)
if extensions:
gene_obj.extensions = extensions
Expand Down
8 changes: 4 additions & 4 deletions src/gene/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from enum import Enum, IntEnum
from typing import Annotated, Literal

from ga4gh.core import domain_models
from ga4gh.vrs import models
from ga4gh.core.models import MappableConcept
from ga4gh.vrs.models import SequenceLocation
from pydantic import (
BaseModel,
ConfigDict,
Expand Down Expand Up @@ -85,7 +85,7 @@ class BaseGene(BaseModel):
label: StrictStr | None = None
strand: Strand | None = None
location_annotations: list[StrictStr] = []
locations: list[models.SequenceLocation] | list[GeneSequenceLocation] = []
locations: list[SequenceLocation] | list[GeneSequenceLocation] = []
aliases: list[StrictStr] = []
previous_symbols: list[StrictStr] = []
xrefs: list[Annotated[str, StringConstraints(pattern=CURIE_REGEX)]] = []
Expand Down Expand Up @@ -301,7 +301,7 @@ class NormalizeService(BaseNormalizationService):
"""Define model for returning normalized concept."""

normalized_id: str | None = None
gene: domain_models.Gene | None = None
gene: MappableConcept | None = None
source_meta_: dict[SourceName, SourceMeta] = {}

model_config = ConfigDict(
Expand Down
Loading

0 comments on commit 1ee3408

Please sign in to comment.