Skip to content

Commit

Permalink
Merge branch 'interface-updates-epic' into cli-refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Nov 25, 2023
2 parents c1db899 + fd2ae77 commit eb396f4
Show file tree
Hide file tree
Showing 17 changed files with 160 additions and 190 deletions.
2 changes: 1 addition & 1 deletion docs/scripts/generate_normalize_figure.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

import gravis as gv

from gene import APP_ROOT
from gene.database import create_db
from gene.etl.base import APP_ROOT
from gene.query import QueryHandler
from gene.schemas import UnmergedNormalizationService

Expand Down
65 changes: 2 additions & 63 deletions src/gene/__init__.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,4 @@
"""The VICC library for normalizing genes."""
import logging
from os import environ
from pathlib import Path
from .version import __version__

from .version import __version__ # noqa: F401

APP_ROOT = Path(__file__).resolve().parent

logging.basicConfig(
filename="gene.log", format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s"
)
logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
logger.handlers = []

logging.getLogger("boto3").setLevel(logging.INFO)
logging.getLogger("botocore").setLevel(logging.INFO)
logging.getLogger("urllib3").setLevel(logging.INFO)
logging.getLogger("python_jsonschema_objects").setLevel(logging.INFO)
logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel(logging.INFO)
logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO)


SEQREPO_ROOT_DIR = Path(
environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
)


class DownloadException(Exception): # noqa: N818
"""Exception for failures relating to source file downloads."""


from gene.schemas import ( # noqa: E402
NamespacePrefix,
RefType,
SourceIDAfterNamespace,
SourceName,
)

ITEM_TYPES = {k.lower(): v.value for k, v in RefType.__members__.items()}

# Sources we import directly (HGNC, Ensembl, NCBI)
SOURCES = {
source.value.lower(): source.value for source in SourceName.__members__.values()
}

# Set of sources we import directly
XREF_SOURCES = {src.lower() for src in SourceName.__members__}

# use to fetch source name from schema based on concept id namespace
# e.g. {"hgnc": "HGNC"}
PREFIX_LOOKUP = {
v.value: SourceName[k].value
for k, v in NamespacePrefix.__members__.items()
if k in SourceName.__members__.keys()
}

# use to generate namespace prefix from source ID value
# e.g. {"ensg": "ensembl"}
NAMESPACE_LOOKUP = {
v.value.lower(): NamespacePrefix[k].value
for k, v in SourceIDAfterNamespace.__members__.items()
if v.value != ""
}
__all__ = ["__version__"]
10 changes: 8 additions & 2 deletions src/gene/database/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from boto3.dynamodb.conditions import Key
from botocore.exceptions import ClientError

from gene import ITEM_TYPES, PREFIX_LOOKUP
from gene.database.database import (
AWS_ENV_VAR_NAME,
SKIP_AWS_DB_ENV_NAME,
Expand All @@ -23,7 +22,14 @@
DatabaseWriteException,
confirm_aws_db_use,
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName
from gene.schemas import (
ITEM_TYPES,
PREFIX_LOOKUP,
RecordType,
RefType,
SourceMeta,
SourceName,
)

logger = logging.getLogger(__name__)

Expand Down
11 changes: 8 additions & 3 deletions src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shutil
from abc import ABC, abstractmethod
from ftplib import FTP
from os import remove
from os import environ, remove
from pathlib import Path
from typing import Callable, Dict, List, Optional

Expand All @@ -15,14 +15,19 @@
from dateutil import parser
from gffutils.feature import Feature

from gene import ITEM_TYPES, SEQREPO_ROOT_DIR
from gene.database import AbstractDatabase
from gene.schemas import Gene, GeneSequenceLocation, MatchType, SourceName
from gene.schemas import ITEM_TYPES, Gene, GeneSequenceLocation, MatchType, SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)


APP_ROOT = Path(__file__).resolve().parent
SEQREPO_ROOT_DIR = Path(
environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
)


class Base(ABC):
"""The ETL base class."""

Expand Down
3 changes: 1 addition & 2 deletions src/gene/etl/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
import requests
from gffutils.feature import Feature

from gene import APP_ROOT
from gene.database import AbstractDatabase
from gene.etl.base import Base
from gene.etl.base import APP_ROOT, Base
from gene.etl.exceptions import (
GeneFileVersionError,
GeneNormalizerEtlError,
Expand Down
4 changes: 2 additions & 2 deletions src/gene/etl/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@

from dateutil import parser

from gene import APP_ROOT, PREFIX_LOOKUP
from gene.database import AbstractDatabase
from gene.etl.base import Base
from gene.etl.base import APP_ROOT, Base
from gene.etl.exceptions import (
GeneFileVersionError,
GeneNormalizerEtlError,
GeneSourceFetchError,
)
from gene.schemas import (
PREFIX_LOOKUP,
Annotation,
Chromosome,
NamespacePrefix,
Expand Down
4 changes: 2 additions & 2 deletions src/gene/etl/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

import gffutils

from gene import APP_ROOT, PREFIX_LOOKUP
from gene.database import AbstractDatabase
from gene.etl.base import Base
from gene.etl.base import APP_ROOT, Base
from gene.etl.exceptions import (
GeneFileVersionError,
GeneNormalizerEtlError,
GeneSourceFetchError,
)
from gene.schemas import (
PREFIX_LOOKUP,
Annotation,
Chromosome,
NamespacePrefix,
Expand Down
53 changes: 30 additions & 23 deletions src/gene/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@

from gene import __version__
from gene.database import create_db
from gene.query import InvalidParameterException, QueryHandler
from gene.schemas import NormalizeService, SearchService, UnmergedNormalizationService
from gene.query import QueryHandler
from gene.schemas import (
SOURCES,
NormalizeService,
SearchService,
SourceName,
UnmergedNormalizationService,
)

db = create_db()
query_handler = QueryHandler(db)
Expand Down Expand Up @@ -42,14 +48,10 @@
read_query_summary = "Given query, provide best-matching source records."
response_description = "A response to a validly-formed query"
q_descr = "Gene to normalize."
incl_descr = """Optional. Comma-separated list of source names to include in
response. Will exclude all other sources. Returns HTTP status code
422: Unprocessable Entity if both 'incl' and 'excl' parameters
are given."""
excl_descr = """Optional. Comma-separated list of source names to exclude in
response. Will include all other sources. Returns HTTP status
code 422: Unprocessable Entity if both 'incl' and 'excl'
parameters are given."""
sources_descr = (
"Optional. Comma-separated list of source names to include in response, if given. "
"Will exclude all other sources."
)
search_description = (
"For each source, return strongest-match concepts "
"for query string provided by user"
Expand All @@ -66,24 +68,29 @@
)
def search(
q: str = Query(..., description=q_descr), # noqa: D103
incl: Optional[str] = Query(None, description=incl_descr),
excl: Optional[str] = Query(None, description=excl_descr),
sources: Optional[str] = Query(None, description=sources_descr),
) -> SearchService:
"""Return strongest match concepts to query string provided by user.
:param str q: gene search term
:param Optional[str] incl: comma-separated list of sources to include,
with all others excluded. Raises HTTPException if both `incl` and
`excl` are given.
:param Optional[str] excl: comma-separated list of sources exclude, with
all others included. Raises HTTPException if both `incl` and `excl`
are given.
:param q: gene search term
:param sources: If given, search only for records from these sources.
Provide as string of source names separated by commas.
:return: JSON response with matched records and source metadata
"""
try:
resp = query_handler.search(html.unescape(q), incl=incl, excl=excl)
except InvalidParameterException as e:
raise HTTPException(status_code=422, detail=str(e))
parsed_sources = []
if sources:
for candidate_source in sources.split(","):
try:
parsed_source = SourceName[
SOURCES[candidate_source.strip().lower()].upper()
]
except KeyError:
raise HTTPException(
status_code=422,
detail=f"Unable to parse source name: {candidate_source}",
)
parsed_sources.append(parsed_source)
resp = query_handler.search(html.unescape(q), sources=parsed_sources)
return resp


Expand Down
Loading

0 comments on commit eb396f4

Please sign in to comment.