diff --git a/README.md b/README.md index ab4da8af..f6202147 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Gene Normalizer ## Overview -The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from [NCBI Gene](https://www.ncbi.nlm.nih.gov/gene/), [Ensembl](https://useast.ensembl.org/index.html), and [HGNC](https://www.genenames.org/), it designates a [CURIE](https://en.wikipedia.org/wiki/CURIE), and provides additional metadata like current and previously-used symbols, aliases, database cross-references and associations, and coordinates. +The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from [NCBI Gene](https://www.ncbi.nlm.nih.gov/gene/), [Ensembl](https://useast.ensembl.org/index.html), and [HGNC](https://www.genenames.org/), it designates a [CURIE](https://en.wikipedia.org/wiki/CURIE), and provides additional metadata like current and previously-used symbols, aliases, database cross-references, and coordinates. --- diff --git a/docs/scripts/generate_normalize_figure.py b/docs/scripts/generate_normalize_figure.py index bb05c7c1..54696855 100644 --- a/docs/scripts/generate_normalize_figure.py +++ b/docs/scripts/generate_normalize_figure.py @@ -111,7 +111,7 @@ def gen_norm_figure() -> None: ) fig.export_html( ( - APP_ROOT.parents[0] + APP_ROOT.parents[2] / "docs" / "source" / "_static" diff --git a/docs/source/_static/html/normalize_example.html b/docs/source/_static/html/normalize_example.html index b22e5b3a..dc09c719 100644 --- a/docs/source/_static/html/normalize_example.html +++ b/docs/source/_static/html/normalize_example.html @@ -6,25 +6,25 @@ -
-
+
+
-
-
-
-
-
+
+
+
+
+
Details for selected element
-
+
-
-
+
+
-
+
General
-
+
-
+
App state
-
-
-
+
Display mode
-
-
-
+
Export
-
- - -
-
+
Data selection
-
+
-
-
+
+
Graph
-
- +
+
-
+
Node label text
-
- +
+
-
+
Edge label text
-
- +
+
-
+
Node size
-
+
- +
-
- + - +
-
+
- Minimum - - Minimum + +
- Maximum - - Maximum + +
-
+
Edge size
-
+
- +
-
- + - +
-
+
- Minimum - - Minimum + +
- Maximum - - Maximum + +
-
+
Nodes
-
+
-
+
Visibility
-
-
- +
+ - +
-
+
Size
-
+
- Scaling factor - - Scaling factor + +
-
+
Position
-
-
-
+
Drag behavior
-
-
- +
+ - +
-
+
Hover behavior
-
-
- +
+ - +
-
- + - +
-
-
+
+
Node images
-
+
-
+
Visibility
-
-
- +
+ - +
-
+
Size
-
- Scaling factor - - + Scaling factor + +
-
+
Node labels
-
+
-
+
Visibility
-
-
- +
+ - +
-
- + - +
-
+
Size
-
- Scaling factor - - + Scaling factor + +
-
-
+
+
Rotation
-
- Angle - - + Angle + +
-
+
Edges
-
+
-
+
Visibility
-
-
- +
+ - +
-
+
Size
-
- Scaling factor - - + Scaling factor + +
-
+
Form
-
- Curvature - - + Curvature + +
-
+
Hover behavior
-
-
- +
+ - +
-
+
Edge labels
-
+
-
+
Visibility
-
-
- +
+ - +
-
- + - +
-
+
Size
-
- Scaling factor - - + Scaling factor + +
-
-
+
+
Rotation
-
- Angle - - + Angle + +
-
+
Layout algorithm
-
+
-
+
Simulation
-
-
- +
+ - +
-
+
Many-body force
-
-
- +
+ - +
-
+
- Strength - - Strength + +
- Theta - - Theta + +
-
- + - +
-
- Min - - + Min + +
-
- + - +
-
- Max - - + Max + +
@@ -891,33 +891,33 @@
-
+
Links force
-
-
- +
+ - +
-
-
+
Collision force
-
-
- +
+ - +
-
+
- Radius - - Radius + +
- Strength - - Strength + +
@@ -958,24 +958,24 @@
-
+
x-positioning force
-
-
- +
+ - +
-
+
- Strength - - Strength + +
@@ -983,24 +983,24 @@
-
+
y-positioning force
-
-
- +
+ - +
-
+
- Strength - - Strength + +
@@ -1008,16 +1008,16 @@
-
+
Centering force
-
-
- +
+ - +
@@ -1070,7 +1070,7 @@ // 1) Fetch state.rawData fetchRawDataFromTemplating(){ - state.rawData = [{"label": "Reference network for OTX2P1 and OTX2P2", "metadata": {"arrow_size": 15, "node_size": 15, "node_label_size": 20, "edge_size": 2}, "nodes": {"hgnc:33281": {"metadata": {"color": "#F8766D", "hover": "hgnc:33281\nOTX2P1\nOTX2 pseudogene 1", "click": "

{\n \"concept_id\": \"hgnc:33281\",\n \"symbol\": \"OTX2P1\",\n \"symbol_status\": \"approved\",\n \"label\": \"OTX2 pseudogene 1\",\n \"strand\": null,\n \"location_annotations\": [],\n \"locations\": [],\n \"aliases\": [],\n \"previous_symbols\": [\n \"OTX2P\"\n ],\n \"xrefs\": [\n \"ncbigene:100033409\",\n \"ensembl:ENSG00000234644\"\n ],\n \"associated_with\": [\n \"refseq:NG_032194\",\n \"vega:OTTHUMG00000020037\",\n \"homeodb:8593\",\n \"pseudogene.org:PGOHUM00000303938\"\n ],\n \"gene_type\": \"pseudogene\"\n}

"}}, "ncbigene:100033409": {"metadata": {"color": "#00BA38", "hover": "ncbigene:100033409\nOTX2P1\nOTX2 pseudogene 1", "click": "

{\n \"concept_id\": \"ncbigene:100033409\",\n \"symbol\": \"OTX2P1\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 1\",\n \"strand\": \"-\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL.nfBggPI8ffYAIbf7DKA8IcL95VNeUzQ6\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI\",\n \"residueAlphabet\": null\n },\n \"start\": 75724170,\n \"end\": 75724811\n }\n ],\n \"aliases\": [\n \"OTX2P\"\n ],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"hgnc:33281\"\n ],\n \"associated_with\": [],\n \"gene_type\": \"pseudo\"\n}

"}}, "ensembl:ENSG00000234644": {"metadata": {"color": "#00B9E3", "hover": "ensembl:ENSG00000234644\nOTX2P1\nOTX2 pseudogene 1", "click": "

{\n \"concept_id\": \"ensembl:ENSG00000234644\",\n \"symbol\": \"OTX2P1\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 1\",\n \"strand\": \"-\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL._CHxr4orI5x27KUc2VPGa0JnGJ0MEMms\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI\",\n \"residueAlphabet\": null\n },\n \"start\": 75724221,\n \"end\": 75724575\n }\n ],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"hgnc:33281\"\n ],\n \"associated_with\": [],\n \"gene_type\": \"processed_pseudogene\"\n}

"}}, "hgnc:54560": {"metadata": {"color": "#F8766D", "hover": "hgnc:54560\nOTX2P2\nOTX2 pseudogene 2", "click": "

{\n \"concept_id\": \"hgnc:54560\",\n \"symbol\": \"OTX2P2\",\n \"symbol_status\": \"approved\",\n \"label\": \"OTX2 pseudogene 2\",\n \"strand\": null,\n \"location_annotations\": [],\n \"locations\": [],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"ncbigene:100419816\",\n \"ensembl:ENSG00000227134\"\n ],\n \"associated_with\": [\n \"refseq:NG_023739\"\n ],\n \"gene_type\": \"pseudogene\"\n}

"}}, "ensembl:ENSG00000227134": {"metadata": {"color": "#00BA38", "hover": "ensembl:ENSG00000227134\nOTX2P2\nOTX2 pseudogene 2", "click": "

{\n \"concept_id\": \"ensembl:ENSG00000227134\",\n \"symbol\": \"OTX2P2\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 2\",\n \"strand\": \"+\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL.JvQZZy4EBCYE2uCSnDfeOpAau8XfbIlq\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g\",\n \"residueAlphabet\": null\n },\n \"start\": 146477426,\n \"end\": 146478113\n }\n ],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"hgnc:54560\"\n ],\n \"associated_with\": [],\n \"gene_type\": \"processed_pseudogene\"\n}

"}}, "ncbigene:100419816": {"metadata": {"color": "#00B9E3", "hover": "ncbigene:100419816\nOTX2P2\nOTX2 pseudogene 2", "click": "

{\n \"concept_id\": \"ncbigene:100419816\",\n \"symbol\": \"OTX2P2\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 2\",\n \"strand\": \"+\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL.9zWoRFYc-hnhG5xUfCxB2hU_JTj3Bw3Z\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g\",\n \"residueAlphabet\": null\n },\n \"start\": 146477354,\n \"end\": 146478110\n }\n ],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"ensembl:ENSG00000227134\",\n \"hgnc:54560\"\n ],\n \"associated_with\": [],\n \"gene_type\": \"pseudo\"\n}

"}}}, "edges": [{"source": "hgnc:33281", "target": "ncbigene:100033409"}, {"source": "hgnc:33281", "target": "ensembl:ENSG00000234644"}, {"source": "ncbigene:100033409", "target": "hgnc:33281"}, {"source": "ensembl:ENSG00000234644", "target": "hgnc:33281"}, {"source": "hgnc:54560", "target": "ncbigene:100419816"}, {"source": "hgnc:54560", "target": "ensembl:ENSG00000227134"}, {"source": "ensembl:ENSG00000227134", "target": "hgnc:54560"}, {"source": "ncbigene:100419816", "target": "ensembl:ENSG00000227134"}, {"source": "ncbigene:100419816", "target": "hgnc:54560"}]}]; + state.rawData = [{"label": "Reference network for OTX2P1 and OTX2P2", "metadata": {"arrow_size": 15, "node_size": 15, "node_label_size": 20, "edge_size": 2}, "nodes": {"hgnc:33281": {"metadata": {"color": "#F8766D", "hover": "hgnc:33281\nOTX2P1\nOTX2 pseudogene 1", "click": "

{\n \"concept_id\": \"hgnc:33281\",\n \"symbol\": \"OTX2P1\",\n \"symbol_status\": \"approved\",\n \"label\": \"OTX2 pseudogene 1\",\n \"strand\": null,\n \"location_annotations\": [],\n \"locations\": [],\n \"aliases\": [],\n \"previous_symbols\": [\n \"OTX2P\"\n ],\n \"xrefs\": [\n \"vega:OTTHUMG00000020037\",\n \"refseq:NG_032194\",\n \"homeodb:8593\",\n \"pseudogene.org:PGOHUM00000303938\",\n \"ensembl:ENSG00000234644\",\n \"ncbigene:100033409\"\n ],\n \"gene_type\": \"pseudogene\"\n}

"}}, "ensembl:ENSG00000234644": {"metadata": {"color": "#00BA38", "hover": "ensembl:ENSG00000234644\nOTX2P1\nOTX2 pseudogene 1", "click": "

{\n \"concept_id\": \"ensembl:ENSG00000234644\",\n \"symbol\": \"OTX2P1\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 1\",\n \"strand\": \"-\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL._CHxr4orI5x27KUc2VPGa0JnGJ0MEMms\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI\",\n \"residueAlphabet\": null\n },\n \"start\": 75724221,\n \"end\": 75724575\n }\n ],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"hgnc:33281\"\n ],\n \"gene_type\": \"processed_pseudogene\"\n}

"}}, "ncbigene:100033409": {"metadata": {"color": "#00B9E3", "hover": "ncbigene:100033409\nOTX2P1\nOTX2 pseudogene 1", "click": "

{\n \"concept_id\": \"ncbigene:100033409\",\n \"symbol\": \"OTX2P1\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 1\",\n \"strand\": \"-\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL.nfBggPI8ffYAIbf7DKA8IcL95VNeUzQ6\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI\",\n \"residueAlphabet\": null\n },\n \"start\": 75724170,\n \"end\": 75724811\n }\n ],\n \"aliases\": [\n \"OTX2P\"\n ],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"hgnc:33281\"\n ],\n \"gene_type\": \"pseudo\"\n}

"}}, "hgnc:54560": {"metadata": {"color": "#F8766D", "hover": "hgnc:54560\nOTX2P2\nOTX2 pseudogene 2", "click": "

{\n \"concept_id\": \"hgnc:54560\",\n \"symbol\": \"OTX2P2\",\n \"symbol_status\": \"approved\",\n \"label\": \"OTX2 pseudogene 2\",\n \"strand\": null,\n \"location_annotations\": [],\n \"locations\": [],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"refseq:NG_023739\",\n \"ncbigene:100419816\",\n \"ensembl:ENSG00000227134\"\n ],\n \"gene_type\": \"pseudogene\"\n}

"}}, "ncbigene:100419816": {"metadata": {"color": "#00BA38", "hover": "ncbigene:100419816\nOTX2P2\nOTX2 pseudogene 2", "click": "

{\n \"concept_id\": \"ncbigene:100419816\",\n \"symbol\": \"OTX2P2\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 2\",\n \"strand\": \"+\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL.9zWoRFYc-hnhG5xUfCxB2hU_JTj3Bw3Z\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g\",\n \"residueAlphabet\": null\n },\n \"start\": 146477354,\n \"end\": 146478110\n }\n ],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"ensembl:ENSG00000227134\",\n \"hgnc:54560\"\n ],\n \"gene_type\": \"pseudo\"\n}

"}}, "ensembl:ENSG00000227134": {"metadata": {"color": "#00B9E3", "hover": "ensembl:ENSG00000227134\nOTX2P2\nOTX2 pseudogene 2", "click": "

{\n \"concept_id\": \"ensembl:ENSG00000227134\",\n \"symbol\": \"OTX2P2\",\n \"symbol_status\": null,\n \"label\": \"OTX2 pseudogene 2\",\n \"strand\": \"+\",\n \"location_annotations\": [],\n \"locations\": [\n {\n \"id\": \"ga4gh:SL.JvQZZy4EBCYE2uCSnDfeOpAau8XfbIlq\",\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceLocation\",\n \"sequenceReference\": {\n \"id\": null,\n \"label\": null,\n \"description\": null,\n \"extensions\": null,\n \"digest\": null,\n \"type\": \"SequenceReference\",\n \"refgetAccession\": \"SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g\",\n \"residueAlphabet\": null\n },\n \"start\": 146477426,\n \"end\": 146478113\n }\n ],\n \"aliases\": [],\n \"previous_symbols\": [],\n \"xrefs\": [\n \"hgnc:54560\"\n ],\n \"gene_type\": \"processed_pseudogene\"\n}

"}}}, "edges": [{"source": "hgnc:33281", "target": "ensembl:ENSG00000234644"}, {"source": "hgnc:33281", "target": "ncbigene:100033409"}, {"source": "ensembl:ENSG00000234644", "target": "hgnc:33281"}, {"source": "ncbigene:100033409", "target": "hgnc:33281"}, {"source": "hgnc:54560", "target": "ncbigene:100419816"}, {"source": "hgnc:54560", "target": "ensembl:ENSG00000227134"}, {"source": "ncbigene:100419816", "target": "ensembl:ENSG00000227134"}, {"source": "ncbigene:100419816", "target": "hgnc:54560"}, {"source": "ensembl:ENSG00000227134", "target": "hgnc:54560"}]}]; // Data selection and normalization state.nodeSizeDataSource = "size"; state.useNodeSizeNormalization = false; @@ -2000,126 +2000,126 @@ elements:{ // Containers - mainContainer: document.getElementById("i8EK4UFXUqNxcNnZd-main-div"), - tooltipContainer: document.getElementById("i8EK4UFXUqNxcNnZd-tooltip-div"), - leftContainer: document.getElementById("i8EK4UFXUqNxcNnZd-left-div"), - rightContainer: document.getElementById("i8EK4UFXUqNxcNnZd-right-div"), - graphContainer: document.getElementById("i8EK4UFXUqNxcNnZd-graph-div"), - detailsContainer: document.getElementById("i8EK4UFXUqNxcNnZd-details-div"), - detailsHead: document.getElementById("i8EK4UFXUqNxcNnZd-details-head"), - detailsBody: document.getElementById("i8EK4UFXUqNxcNnZd-details-body"), + mainContainer: document.getElementById("imev8OqzlOAPljR9j-main-div"), + tooltipContainer: document.getElementById("imev8OqzlOAPljR9j-tooltip-div"), + leftContainer: document.getElementById("imev8OqzlOAPljR9j-left-div"), + rightContainer: document.getElementById("imev8OqzlOAPljR9j-right-div"), + graphContainer: document.getElementById("imev8OqzlOAPljR9j-graph-div"), + detailsContainer: document.getElementById("imev8OqzlOAPljR9j-details-div"), + detailsHead: document.getElementById("imev8OqzlOAPljR9j-details-head"), + detailsBody: document.getElementById("imev8OqzlOAPljR9j-details-body"), // Data sources - dataHead: document.getElementById("i8EK4UFXUqNxcNnZd-data-head"), - dataBody: document.getElementById("i8EK4UFXUqNxcNnZd-data-body"), - graphSelectionContainer: document.getElementById("i8EK4UFXUqNxcNnZd-graph-select-div"), - graphSelection: document.getElementById("i8EK4UFXUqNxcNnZd-graph-select"), - nodeSizeDataSourceSelect: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-data-source-select"), - nodeSizeNormalizationCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-normalization-checkbox"), - nodeSizeNormalizationContainer: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-norm-div"), - nodeSizeNormalizationMinText: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-normalization-min-text"), - nodeSizeNormalizationMinSlider: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-normalization-min-slider"), - nodeSizeNormalizationMaxText: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-normalization-max-text"), - nodeSizeNormalizationMaxSlider: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-normalization-max-slider"), - edgeSizeDataSourceSelect: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-data-source-select"), - edgeSizeNormalizationCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-normalization-checkbox"), - edgeSizeNormalizationContainer: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-norm-div"), - edgeSizeNormalizationMinText: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-normalization-min-text"), - edgeSizeNormalizationMinSlider: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-normalization-min-slider"), - edgeSizeNormalizationMaxText: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-normalization-max-text"), - edgeSizeNormalizationMaxSlider: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-normalization-max-slider"), + dataHead: document.getElementById("imev8OqzlOAPljR9j-data-head"), + dataBody: document.getElementById("imev8OqzlOAPljR9j-data-body"), + graphSelectionContainer: document.getElementById("imev8OqzlOAPljR9j-graph-select-div"), + graphSelection: document.getElementById("imev8OqzlOAPljR9j-graph-select"), + nodeSizeDataSourceSelect: document.getElementById("imev8OqzlOAPljR9j-node-size-data-source-select"), + nodeSizeNormalizationCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-size-normalization-checkbox"), + nodeSizeNormalizationContainer: document.getElementById("imev8OqzlOAPljR9j-node-size-norm-div"), + nodeSizeNormalizationMinText: document.getElementById("imev8OqzlOAPljR9j-node-size-normalization-min-text"), + nodeSizeNormalizationMinSlider: document.getElementById("imev8OqzlOAPljR9j-node-size-normalization-min-slider"), + nodeSizeNormalizationMaxText: document.getElementById("imev8OqzlOAPljR9j-node-size-normalization-max-text"), + nodeSizeNormalizationMaxSlider: document.getElementById("imev8OqzlOAPljR9j-node-size-normalization-max-slider"), + edgeSizeDataSourceSelect: document.getElementById("imev8OqzlOAPljR9j-edge-size-data-source-select"), + edgeSizeNormalizationCheckbox: document.getElementById("imev8OqzlOAPljR9j-edge-size-normalization-checkbox"), + edgeSizeNormalizationContainer: document.getElementById("imev8OqzlOAPljR9j-edge-size-norm-div"), + edgeSizeNormalizationMinText: document.getElementById("imev8OqzlOAPljR9j-edge-size-normalization-min-text"), + edgeSizeNormalizationMinSlider: document.getElementById("imev8OqzlOAPljR9j-edge-size-normalization-min-slider"), + edgeSizeNormalizationMaxText: document.getElementById("imev8OqzlOAPljR9j-edge-size-normalization-max-text"), + edgeSizeNormalizationMaxSlider: document.getElementById("imev8OqzlOAPljR9j-edge-size-normalization-max-slider"), // General - generalHead: document.getElementById("i8EK4UFXUqNxcNnZd-general-head"), - generalBody: document.getElementById("i8EK4UFXUqNxcNnZd-general-body"), - resetButton: document.getElementById("i8EK4UFXUqNxcNnZd-reset"), - fullscreenButton: document.getElementById("i8EK4UFXUqNxcNnZd-fullscreen-button"), - svgExportButton: document.getElementById("i8EK4UFXUqNxcNnZd-svg"), - pngExportButton: document.getElementById("i8EK4UFXUqNxcNnZd-png"), - jpgExportButton: document.getElementById("i8EK4UFXUqNxcNnZd-jpg"), + generalHead: document.getElementById("imev8OqzlOAPljR9j-general-head"), + generalBody: document.getElementById("imev8OqzlOAPljR9j-general-body"), + resetButton: document.getElementById("imev8OqzlOAPljR9j-reset"), + fullscreenButton: document.getElementById("imev8OqzlOAPljR9j-fullscreen-button"), + svgExportButton: document.getElementById("imev8OqzlOAPljR9j-svg"), + pngExportButton: document.getElementById("imev8OqzlOAPljR9j-png"), + jpgExportButton: document.getElementById("imev8OqzlOAPljR9j-jpg"), // Nodes - nodeHead: document.getElementById("i8EK4UFXUqNxcNnZd-node-head"), - nodeBody: document.getElementById("i8EK4UFXUqNxcNnZd-node-body"), - nodeCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-checkbox"), - nodeSizeFactorText: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-factor-text"), - nodeSizeFactorSlider: document.getElementById("i8EK4UFXUqNxcNnZd-node-size-factor-slider"), - nodeDragFixCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-drag-fix-checkbox"), - nodeHoverNeighborhoodCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-hover-neighborhood-checkbox"), - nodeHoverTooltipCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-hover-tooltip-checkbox"), - nodeReleaseButton: document.getElementById("i8EK4UFXUqNxcNnZd-node-release-button"), + nodeHead: document.getElementById("imev8OqzlOAPljR9j-node-head"), + nodeBody: document.getElementById("imev8OqzlOAPljR9j-node-body"), + nodeCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-checkbox"), + nodeSizeFactorText: document.getElementById("imev8OqzlOAPljR9j-node-size-factor-text"), + nodeSizeFactorSlider: document.getElementById("imev8OqzlOAPljR9j-node-size-factor-slider"), + nodeDragFixCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-drag-fix-checkbox"), + nodeHoverNeighborhoodCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-hover-neighborhood-checkbox"), + nodeHoverTooltipCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-hover-tooltip-checkbox"), + nodeReleaseButton: document.getElementById("imev8OqzlOAPljR9j-node-release-button"), // Node images - nodeImageHead: document.getElementById("i8EK4UFXUqNxcNnZd-node-image-head"), - nodeImageBody: document.getElementById("i8EK4UFXUqNxcNnZd-node-image-body"), - nodeImageCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-image-checkbox"), - nodeImageMetaControl: document.getElementById("i8EK4UFXUqNxcNnZd-node-image-meta-control"), - nodeImageSizeFactorText: document.getElementById("i8EK4UFXUqNxcNnZd-node-image-size-factor-text"), - nodeImageSizeFactorSlider: document.getElementById("i8EK4UFXUqNxcNnZd-node-image-size-factor-slider"), + nodeImageHead: document.getElementById("imev8OqzlOAPljR9j-node-image-head"), + nodeImageBody: document.getElementById("imev8OqzlOAPljR9j-node-image-body"), + nodeImageCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-image-checkbox"), + nodeImageMetaControl: document.getElementById("imev8OqzlOAPljR9j-node-image-meta-control"), + nodeImageSizeFactorText: document.getElementById("imev8OqzlOAPljR9j-node-image-size-factor-text"), + nodeImageSizeFactorSlider: document.getElementById("imev8OqzlOAPljR9j-node-image-size-factor-slider"), // Node labels - nodeLabelHead: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-head"), - nodeLabelBody: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-body"), - nodeLabelCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-checkbox"), - nodeLabelBorderCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-border-checkbox"), - nodeLabelTextDataSourceSelect: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-data-source-select"), - nodeLabelSizeFactorText: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-size-factor-text"), - nodeLabelSizeFactorSlider: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-size-factor-slider"), - nodeLabelRotationText: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-rotation-text"), - nodeLabelRotationSlider: document.getElementById("i8EK4UFXUqNxcNnZd-node-label-rotation-slider"), + nodeLabelHead: document.getElementById("imev8OqzlOAPljR9j-node-label-head"), + nodeLabelBody: document.getElementById("imev8OqzlOAPljR9j-node-label-body"), + nodeLabelCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-label-checkbox"), + nodeLabelBorderCheckbox: document.getElementById("imev8OqzlOAPljR9j-node-label-border-checkbox"), + nodeLabelTextDataSourceSelect: document.getElementById("imev8OqzlOAPljR9j-node-label-data-source-select"), + nodeLabelSizeFactorText: document.getElementById("imev8OqzlOAPljR9j-node-label-size-factor-text"), + nodeLabelSizeFactorSlider: document.getElementById("imev8OqzlOAPljR9j-node-label-size-factor-slider"), + nodeLabelRotationText: document.getElementById("imev8OqzlOAPljR9j-node-label-rotation-text"), + nodeLabelRotationSlider: document.getElementById("imev8OqzlOAPljR9j-node-label-rotation-slider"), // Edges - edgeHead: document.getElementById("i8EK4UFXUqNxcNnZd-edge-head"), - edgeBody: document.getElementById("i8EK4UFXUqNxcNnZd-edge-body"), - edgeCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-edge-checkbox"), - edgeSizeFactorText: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-factor-text"), - edgeSizeFactorSlider: document.getElementById("i8EK4UFXUqNxcNnZd-edge-size-factor-slider"), - edgeCurvatureText: document.getElementById("i8EK4UFXUqNxcNnZd-edge-curvature-text"), - edgeCurvatureSlider: document.getElementById("i8EK4UFXUqNxcNnZd-edge-curvature-slider"), - edgeHoverTooltipCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-edge-hover-tooltip-checkbox"), + edgeHead: document.getElementById("imev8OqzlOAPljR9j-edge-head"), + edgeBody: document.getElementById("imev8OqzlOAPljR9j-edge-body"), + edgeCheckbox: document.getElementById("imev8OqzlOAPljR9j-edge-checkbox"), + edgeSizeFactorText: document.getElementById("imev8OqzlOAPljR9j-edge-size-factor-text"), + edgeSizeFactorSlider: document.getElementById("imev8OqzlOAPljR9j-edge-size-factor-slider"), + edgeCurvatureText: document.getElementById("imev8OqzlOAPljR9j-edge-curvature-text"), + edgeCurvatureSlider: document.getElementById("imev8OqzlOAPljR9j-edge-curvature-slider"), + edgeHoverTooltipCheckbox: document.getElementById("imev8OqzlOAPljR9j-edge-hover-tooltip-checkbox"), // Edge labels - edgeLabelHead: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-head"), - edgeLabelBody: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-body"), - edgeLabelCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-checkbox"), - edgeLabelBorderCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-border-checkbox"), - edgeLabelTextDataSourceSelect: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-data-source-select"), - edgeLabelSizeFactorText: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-size-factor-text"), - edgeLabelSizeFactorSlider: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-size-factor-slider"), - edgeLabelRotationText: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-rotation-text"), - edgeLabelRotationSlider: document.getElementById("i8EK4UFXUqNxcNnZd-edge-label-rotation-slider"), + edgeLabelHead: document.getElementById("imev8OqzlOAPljR9j-edge-label-head"), + edgeLabelBody: document.getElementById("imev8OqzlOAPljR9j-edge-label-body"), + edgeLabelCheckbox: document.getElementById("imev8OqzlOAPljR9j-edge-label-checkbox"), + edgeLabelBorderCheckbox: document.getElementById("imev8OqzlOAPljR9j-edge-label-border-checkbox"), + edgeLabelTextDataSourceSelect: document.getElementById("imev8OqzlOAPljR9j-edge-label-data-source-select"), + edgeLabelSizeFactorText: document.getElementById("imev8OqzlOAPljR9j-edge-label-size-factor-text"), + edgeLabelSizeFactorSlider: document.getElementById("imev8OqzlOAPljR9j-edge-label-size-factor-slider"), + edgeLabelRotationText: document.getElementById("imev8OqzlOAPljR9j-edge-label-rotation-text"), + edgeLabelRotationSlider: document.getElementById("imev8OqzlOAPljR9j-edge-label-rotation-slider"), // Layout algorithm - layoutAlgorithmHead: document.getElementById("i8EK4UFXUqNxcNnZd-layout-algorithm-head"), - layoutAlgorithmBody: document.getElementById("i8EK4UFXUqNxcNnZd-layout-algorithm-body"), - simulationCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-simulation-active-checkbox"), - manyBodyForceCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-checkbox"), - manyBodyForceContainer: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-div"), - manyBodyForceStrengthText: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-strength-text"), - manyBodyForceStrengthSlider: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-strength-slider"), - manyBodyForceThetaText: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-theta-text"), - manyBodyForceThetaSlider: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-theta-slider"), - manyBodyForceMinDistCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-min-distance-checkbox"), - manyBodyForceMinDistContainer: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-min-distance-div"), - manyBodyForceMinDistText: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-min-distance-text"), - manyBodyForceMinDistSlider: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-min-distance-slider"), - manyBodyForceMaxDistCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-max-distance-checkbox"), - manyBodyForceMaxDistContainer: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-max-distance-div"), - manyBodyForceMaxDistText: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-max-distance-text"), - manyBodyForceMaxDistSlider: document.getElementById("i8EK4UFXUqNxcNnZd-many-body-force-max-distance-slider"), - linksForceCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-links-force-checkbox"), - linksForceContainer: document.getElementById("i8EK4UFXUqNxcNnZd-links-force-div"), - linksForceDistanceText: document.getElementById("i8EK4UFXUqNxcNnZd-links-force-distance-text"), - linksForceDistanceSlider: document.getElementById("i8EK4UFXUqNxcNnZd-links-force-distance-slider"), - linksForceStrengthText: document.getElementById("i8EK4UFXUqNxcNnZd-links-force-strength-text"), - linksForceStrengthSlider: document.getElementById("i8EK4UFXUqNxcNnZd-links-force-strength-slider"), - collisionForceCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-collision-force-checkbox"), - collisionForceContainer: document.getElementById("i8EK4UFXUqNxcNnZd-collision-force-div"), - collisionForceRadiusText: document.getElementById("i8EK4UFXUqNxcNnZd-collision-force-radius-text"), - collisionForceRadiusSlider: document.getElementById("i8EK4UFXUqNxcNnZd-collision-force-radius-slider"), - collisionForceStrengthText: document.getElementById("i8EK4UFXUqNxcNnZd-collision-force-strength-text"), - collisionForceStrengthSlider: document.getElementById("i8EK4UFXUqNxcNnZd-collision-force-strength-slider"), - xPositioningForceCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-x-positioning-force-checkbox"), - xPositioningForceContainer: document.getElementById("i8EK4UFXUqNxcNnZd-x-positioning-force-div"), - xPositioningForceStrengthText: document.getElementById("i8EK4UFXUqNxcNnZd-x-positioning-force-strength-text"), - xPositioningForceStrengthSlider: document.getElementById("i8EK4UFXUqNxcNnZd-x-positioning-force-strength-slider"), - yPositioningForceCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-y-positioning-force-checkbox"), - yPositioningForceContainer: document.getElementById("i8EK4UFXUqNxcNnZd-y-positioning-force-div"), - yPositioningForceStrengthText: document.getElementById("i8EK4UFXUqNxcNnZd-y-positioning-force-strength-text"), - yPositioningForceStrengthSlider: document.getElementById("i8EK4UFXUqNxcNnZd-y-positioning-force-strength-slider"), - centeringForceCheckbox: document.getElementById("i8EK4UFXUqNxcNnZd-centering-force-checkbox"), + layoutAlgorithmHead: document.getElementById("imev8OqzlOAPljR9j-layout-algorithm-head"), + layoutAlgorithmBody: document.getElementById("imev8OqzlOAPljR9j-layout-algorithm-body"), + simulationCheckbox: document.getElementById("imev8OqzlOAPljR9j-simulation-active-checkbox"), + manyBodyForceCheckbox: document.getElementById("imev8OqzlOAPljR9j-many-body-force-checkbox"), + manyBodyForceContainer: document.getElementById("imev8OqzlOAPljR9j-many-body-force-div"), + manyBodyForceStrengthText: document.getElementById("imev8OqzlOAPljR9j-many-body-force-strength-text"), + manyBodyForceStrengthSlider: document.getElementById("imev8OqzlOAPljR9j-many-body-force-strength-slider"), + manyBodyForceThetaText: document.getElementById("imev8OqzlOAPljR9j-many-body-force-theta-text"), + manyBodyForceThetaSlider: document.getElementById("imev8OqzlOAPljR9j-many-body-force-theta-slider"), + manyBodyForceMinDistCheckbox: document.getElementById("imev8OqzlOAPljR9j-many-body-force-min-distance-checkbox"), + manyBodyForceMinDistContainer: document.getElementById("imev8OqzlOAPljR9j-many-body-force-min-distance-div"), + manyBodyForceMinDistText: document.getElementById("imev8OqzlOAPljR9j-many-body-force-min-distance-text"), + manyBodyForceMinDistSlider: document.getElementById("imev8OqzlOAPljR9j-many-body-force-min-distance-slider"), + manyBodyForceMaxDistCheckbox: document.getElementById("imev8OqzlOAPljR9j-many-body-force-max-distance-checkbox"), + manyBodyForceMaxDistContainer: document.getElementById("imev8OqzlOAPljR9j-many-body-force-max-distance-div"), + manyBodyForceMaxDistText: document.getElementById("imev8OqzlOAPljR9j-many-body-force-max-distance-text"), + manyBodyForceMaxDistSlider: document.getElementById("imev8OqzlOAPljR9j-many-body-force-max-distance-slider"), + linksForceCheckbox: document.getElementById("imev8OqzlOAPljR9j-links-force-checkbox"), + linksForceContainer: document.getElementById("imev8OqzlOAPljR9j-links-force-div"), + linksForceDistanceText: document.getElementById("imev8OqzlOAPljR9j-links-force-distance-text"), + linksForceDistanceSlider: document.getElementById("imev8OqzlOAPljR9j-links-force-distance-slider"), + linksForceStrengthText: document.getElementById("imev8OqzlOAPljR9j-links-force-strength-text"), + linksForceStrengthSlider: document.getElementById("imev8OqzlOAPljR9j-links-force-strength-slider"), + collisionForceCheckbox: document.getElementById("imev8OqzlOAPljR9j-collision-force-checkbox"), + collisionForceContainer: document.getElementById("imev8OqzlOAPljR9j-collision-force-div"), + collisionForceRadiusText: document.getElementById("imev8OqzlOAPljR9j-collision-force-radius-text"), + collisionForceRadiusSlider: document.getElementById("imev8OqzlOAPljR9j-collision-force-radius-slider"), + collisionForceStrengthText: document.getElementById("imev8OqzlOAPljR9j-collision-force-strength-text"), + collisionForceStrengthSlider: document.getElementById("imev8OqzlOAPljR9j-collision-force-strength-slider"), + xPositioningForceCheckbox: document.getElementById("imev8OqzlOAPljR9j-x-positioning-force-checkbox"), + xPositioningForceContainer: document.getElementById("imev8OqzlOAPljR9j-x-positioning-force-div"), + xPositioningForceStrengthText: document.getElementById("imev8OqzlOAPljR9j-x-positioning-force-strength-text"), + xPositioningForceStrengthSlider: document.getElementById("imev8OqzlOAPljR9j-x-positioning-force-strength-slider"), + yPositioningForceCheckbox: document.getElementById("imev8OqzlOAPljR9j-y-positioning-force-checkbox"), + yPositioningForceContainer: document.getElementById("imev8OqzlOAPljR9j-y-positioning-force-div"), + yPositioningForceStrengthText: document.getElementById("imev8OqzlOAPljR9j-y-positioning-force-strength-text"), + yPositioningForceStrengthSlider: document.getElementById("imev8OqzlOAPljR9j-y-positioning-force-strength-slider"), + centeringForceCheckbox: document.getElementById("imev8OqzlOAPljR9j-centering-force-checkbox"), }, composites:{ @@ -2481,7 +2481,7 @@ create(){ // Main container this.mainContainer = document.createElement("div"); - this.mainContainer.id = "i8EK4UFXUqNxcNnZd-progress-container"; + this.mainContainer.id = "imev8OqzlOAPljR9j-progress-container"; this.mainContainer.style.backgroundColor = state.shownData.general.background_color; ui.elements.graphContainer.style.backgroundColor = state.shownData.general.background_color; // Text container @@ -2530,7 +2530,7 @@ } else { menuDiv.innerText = ui.symbols.menuHidden; } - menuDiv.id = "i8EK4UFXUqNxcNnZd-menu-toggle-button"; + menuDiv.id = "imev8OqzlOAPljR9j-menu-toggle-button"; menuDiv.onclick = ui.composites.menu.toggle; ui.elements.graphContainer.appendChild(menuDiv); ui.elements.menuToggleDiv = menuDiv; @@ -2544,27 +2544,27 @@ } else { detailsDiv.innerText = ui.symbols.detailsHidden; } - detailsDiv.id = "i8EK4UFXUqNxcNnZd-details-toggle-button"; + detailsDiv.id = "imev8OqzlOAPljR9j-details-toggle-button"; detailsDiv.onclick = ui.composites.details.toggle; ui.elements.graphContainer.appendChild(detailsDiv); ui.elements.detailsToggleDiv = detailsDiv; } // - Graph drawing area - const svg = d3.select("#i8EK4UFXUqNxcNnZd-graph-div").append("svg"); + const svg = d3.select("#imev8OqzlOAPljR9j-graph-div").append("svg"); state.currentGraphParts.svg = svg; svg .attr("width", state.graphContainerWidth) .attr("height", state.graphContainerHeight); // - Background rectangle const backgroundRect = svg.append("rect") - .attr("id", "i8EK4UFXUqNxcNnZd-background") + .attr("id", "imev8OqzlOAPljR9j-background") .attr("width", state.graphContainerWidth) .attr("height", state.graphContainerHeight) .attr("fill", state.shownData.general.background_color); state.currentGraphParts.backgroundRect = backgroundRect; // - Zoomable and draggable group as graph drawing area - const view = svg.append("g").attr("id", "i8EK4UFXUqNxcNnZd-zoomable-graph-group"); + const view = svg.append("g").attr("id", "imev8OqzlOAPljR9j-zoomable-graph-group"); state.currentGraphParts.view = view; function zoomed(event) { view.attr("transform", event.transform); @@ -2623,7 +2623,7 @@ // in back and which are drawn in front (there is no z-order property in SVG) // Edges state.currentGraphParts.edgeMainGroup = view.append("g") - .attr("id", "i8EK4UFXUqNxcNnZd-edge-group") + .attr("id", "imev8OqzlOAPljR9j-edge-group") .style("display", ui.convert.boolToDisplayStyle(state.showEdges)); state.currentGraphParts.edgeGroups = state.currentGraphParts.edgeMainGroup .selectAll("g") @@ -2631,7 +2631,7 @@ .join("g"); // Nodes state.currentGraphParts.nodeMainGroup = view.append("g") - .attr("id", "i8EK4UFXUqNxcNnZd-node-group") + .attr("id", "imev8OqzlOAPljR9j-node-group") .style("display", ui.convert.boolToDisplayStyle(state.showNodes)); state.currentGraphParts.nodeGroups = state.currentGraphParts.nodeMainGroup .selectAll("g") @@ -2643,7 +2643,7 @@ edgeLabelData.push(i); } state.currentGraphParts.edgeLabelMainGroup = view.append("g") - .attr("id", "i8EK4UFXUqNxcNnZd-edge-label-group") + .attr("id", "imev8OqzlOAPljR9j-edge-label-group") .style("display", ui.convert.boolToDisplayStyle(state.showEdgeLabels)); state.currentGraphParts.edgeLabelGroups = state.currentGraphParts.edgeLabelMainGroup .selectAll("g") @@ -2655,7 +2655,7 @@ nodeLabelData.push(i); } state.currentGraphParts.nodeLabelMainGroup = view.append("g") - .attr("id", "i8EK4UFXUqNxcNnZd-node-label-group"); + .attr("id", "imev8OqzlOAPljR9j-node-label-group"); state.currentGraphParts.nodeLabelGroups = state.currentGraphParts.nodeLabelMainGroup .style("display", ui.convert.boolToDisplayStyle(state.showNodeLabels)) .selectAll("g") @@ -2672,7 +2672,7 @@ } } state.currentGraphParts.nodeImageMainGroup = view.append("g") - .attr("id", "i8EK4UFXUqNxcNnZd-node-image-group") + .attr("id", "imev8OqzlOAPljR9j-node-image-group") .style("display", ui.convert.boolToDisplayStyle(state.showNodeImages)); state.currentGraphParts.nodeImageGroups = state.currentGraphParts.nodeImageMainGroup .selectAll("g") @@ -2903,7 +2903,7 @@ function nodeClicked(event, node){ let htmlText = "
Node: " + String(node.id) + "
"; if(typeof(node.click) !== "undefined" && node.click !== ""){ - htmlText += '
' + node.click + '
'; + htmlText += '
' + node.click + '
'; } ui.elements.detailsBody.innerHTML = htmlText; } @@ -3156,7 +3156,7 @@ // 1) Remove existing elements ui.composites.graph.removeEdges(); - svg.select("#i8EK4UFXUqNxcNnZd-arrow-marker").remove(); + svg.select("#imev8OqzlOAPljR9j-arrow-marker").remove(); // 2) Create new elements // - Paths state.currentGraphParts.edgePaths = edgeGroups.append("path") @@ -3170,7 +3170,7 @@ const arrowSizeDouble = arrowSize * 2.0, arrowSizeHalf = arrowSize / 2.0; const marker = svg.append("marker") - .attr("id", "i8EK4UFXUqNxcNnZd-arrow-marker") + .attr("id", "imev8OqzlOAPljR9j-arrow-marker") .attr("markerUnits", "userSpaceOnUse") .attr("viewBox", "0 0 " + arrowSizeDouble + " " + arrowSizeDouble) .attr("refX", 0) @@ -3183,7 +3183,7 @@ .attr("d", "M 0 0 L " + arrowSizeDouble + " " + arrowSizeHalf + " L 0 " + arrowSize + " z") .attr("fill", state.shownData.general.arrow_color) state.currentGraphParts.edgePaths - .attr("marker-end", "url(#i8EK4UFXUqNxcNnZd-arrow-marker)"); + .attr("marker-end", "url(#imev8OqzlOAPljR9j-arrow-marker)"); } // - Edge hover behavior if(state.parsedData.general.contains_edge_hover){ @@ -3217,7 +3217,7 @@ function edgeClicked(event, edge){ let htmlText = "
Edge: " + String(edge.id) + "
"; if(typeof(edge.click) !== "undefined" && edge.click !== ""){ - htmlText += '
' + edge.click + '
'; + htmlText += '
' + edge.click + '
'; } ui.elements.detailsBody.innerHTML = htmlText; } diff --git a/docs/source/index.rst b/docs/source/index.rst index 7c6e5926..d64bbe4d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,7 +12,7 @@ Gene Normalizer |version| :alt: citation :target: https://zenodo.org/badge/latestdoi/309797998 -The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from `NCBI Gene `_, `Ensembl `_, and `HGNC `_, it designates a `CURIE `_, and provides additional metadata like current and previously-used symbols, aliases, database cross-references and associations, and coordinates. +The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from `NCBI Gene `_, `Ensembl `_, and `HGNC `_, it designates a `CURIE `_, and provides additional metadata like current and previously-used symbols, aliases, database cross-references, and coordinates. A `public REST instance of the service `_ is available for programmatic queries: diff --git a/docs/source/managing_data/index.rst b/docs/source/managing_data/index.rst index a340b645..feaf7055 100644 --- a/docs/source/managing_data/index.rst +++ b/docs/source/managing_data/index.rst @@ -1,11 +1,10 @@ Managing data ============= -The current iteration of the Gene Normalizer stores millions of symbols, names, cross-references, and other associations. This section describes how to load and refresh data, and details usage of specific backend storage implementations. +The current iteration of the Gene Normalizer stores millions of symbols, names, cross-references, and other information. This section describes how to load and refresh data, and details usage of specific backend storage implementations. .. toctree:: Loading and updating data DynamoDB storage backend PostgreSQL storage backend - diff --git a/docs/source/normalizing_data/sources.rst b/docs/source/normalizing_data/sources.rst index 591e582c..0de9cf57 100644 --- a/docs/source/normalizing_data/sources.rst +++ b/docs/source/normalizing_data/sources.rst @@ -33,9 +33,7 @@ HGNC "previous_symbols": [], "xrefs": [ "ensembl:ENSG00000157764", - "ncbigene:673" - ], - "associated_with": [ + "ncbigene:673", "uniprot:P15056", "pubmed:2284096", "omim:164757", @@ -99,7 +97,6 @@ Ensembl "xrefs": [ "hgnc:1097" ], - "associated_with": [], "gene_type": "protein_coding", "match_type": 100 } @@ -143,9 +140,7 @@ The `NCBI Gene Database `_ is a service prov "previous_symbols": [], "xrefs": [ "ensembl:ENSG00000157764", - "hgnc:1097" - ], - "associated_with": [ + "hgnc:1097", "omim:164757" ], "gene_type": "protein-coding", diff --git a/src/gene/database/dynamodb.py b/src/gene/database/dynamodb.py index 6f7b0ee7..629059c8 100644 --- a/src/gene/database/dynamodb.py +++ b/src/gene/database/dynamodb.py @@ -434,8 +434,7 @@ def _add_ref_record( :param str term: referent term :param str concept_id: concept ID to refer to - :param str ref_type: one of {'alias', 'label', 'xref', - 'associated_with'} + :param str ref_type: one of {'alias', 'label', 'xref'} :param src_name: name of source for record """ label_and_type = f"{term.lower()}##{ref_type}" diff --git a/src/gene/database/postgresql.py b/src/gene/database/postgresql.py index 6638645c..66a43132 100644 --- a/src/gene/database/postgresql.py +++ b/src/gene/database/postgresql.py @@ -97,7 +97,6 @@ def list_tables(self) -> List[str]: _drop_db_query = b""" DROP MATERIALIZED VIEW IF EXISTS record_lookup_view; DROP TABLE IF EXISTS - gene_associations, gene_symbols, gene_previous_symbols, gene_aliases, @@ -324,12 +323,11 @@ def _format_source_record(self, source_row: Tuple) -> Dict: "locations": source_row[5], "gene_type": source_row[6], "aliases": source_row[7], - "associated_with": source_row[8], - "previous_symbols": source_row[9], - "symbol": source_row[10], - "xrefs": source_row[11], - "src_name": source_row[12], - "merge_ref": source_row[13], + "previous_symbols": source_row[8], + "symbol": source_row[9], + "xrefs": source_row[10], + "src_name": source_row[11], + "merge_ref": source_row[12], "item_type": RecordType.IDENTITY.value, } return {k: v for k, v in gene_record.items() if v} @@ -373,8 +371,7 @@ def _format_merged_record(self, merged_row: Tuple) -> Dict: "hgnc_locus_type": merged_row[11], "ncbi_gene_type": merged_row[12], "aliases": merged_row[13], - "associated_with": merged_row[14], - "xrefs": merged_row[15], + "xrefs": merged_row[14], "item_type": RecordType.MERGER.value, } return {k: v for k, v in merged_record.items() if v} @@ -421,7 +418,6 @@ def get_record_by_id( RefType.PREVIOUS_SYMBOLS: b"SELECT concept_id FROM gene_previous_symbols WHERE lower(prev_symbol) = %s;", # noqa: E501 RefType.ALIASES: b"SELECT concept_id FROM gene_aliases WHERE lower(alias) = %s;", # noqa: E501 RefType.XREFS: b"SELECT concept_id FROM gene_xrefs WHERE lower(xref) = %s;", - RefType.ASSOCIATED_WITH: b"SELECT concept_id FROM gene_associations WHERE lower(associated_with) = %s;", # noqa: E501 } def get_refs_by_type(self, search_term: str, ref_type: RefType) -> List[str]: @@ -558,9 +554,6 @@ def add_source_metadata(self, src_name: SourceName, meta: SourceMeta) -> None: ) _ins_alias_query = b"INSERT INTO gene_aliases (alias, concept_id) VALUES (%s, %s);" _ins_xref_query = b"INSERT INTO gene_xrefs (xref, concept_id) VALUES (%s, %s);" - _ins_assoc_query = ( - b"INSERT INTO gene_associations (associated_with, concept_id) VALUES (%s, %s);" - ) def add_record(self, record: Dict, src_name: SourceName) -> None: """Add new record to database. @@ -591,8 +584,6 @@ def add_record(self, record: Dict, src_name: SourceName) -> None: cur.execute(self._ins_alias_query, [a, concept_id]) for x in record.get("xrefs", []): cur.execute(self._ins_xref_query, [x, concept_id]) - for a in record.get("associated_with", []): - cur.execute(self._ins_assoc_query, [a, concept_id]) for p in record.get("previous_symbols", []): cur.execute(self._ins_prev_symbol_query, [p, concept_id]) if record.get("symbol"): @@ -606,10 +597,9 @@ def add_record(self, record: Dict, src_name: SourceName) -> None: INSERT INTO gene_merged ( concept_id, symbol, symbol_status, previous_symbols, label, strand, location_annotations, ensembl_locations, hgnc_locations, ncbi_locations, - hgnc_locus_type, ensembl_biotype, ncbi_gene_type, aliases, associated_with, - xrefs + hgnc_locus_type, ensembl_biotype, ncbi_gene_type, aliases, xrefs ) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); """ def add_merged_record(self, record: Dict) -> None: @@ -644,7 +634,6 @@ def add_merged_record(self, record: Dict) -> None: record.get("ensembl_biotype"), record.get("ncbi_gene_type"), record.get("aliases"), - record.get("associated_with"), record.get("xrefs"), ], ) @@ -702,13 +691,6 @@ def delete_normalized_concepts(self) -> None: WHERE gc.source = %s ); """ - _drop_associations_query = b""" - DELETE FROM gene_associations WHERE id IN ( - SELECT ga.id FROM gene_associations ga LEFT JOIN gene_concepts gc - ON gc.concept_id = ga.concept_id - WHERE gc.source = %s - ); - """ _drop_prev_symbols_query = b""" DELETE FROM gene_previous_symbols WHERE id IN ( SELECT gps.id FROM gene_previous_symbols gps LEFT JOIN gene_concepts gc @@ -750,7 +732,6 @@ def delete_source(self, src_name: SourceName) -> None: """ with self.conn.cursor() as cur: cur.execute(self._drop_aliases_query, [src_name.value]) - cur.execute(self._drop_associations_query, [src_name.value]) cur.execute(self._drop_prev_symbols_query, [src_name.value]) cur.execute(self._drop_symbols_query, [src_name.value]) cur.execute(self._drop_xrefs_query, [src_name.value]) diff --git a/src/gene/database/postgresql/add_fkeys.sql b/src/gene/database/postgresql/add_fkeys.sql index f93459b3..28e1a88f 100644 --- a/src/gene/database/postgresql/add_fkeys.sql +++ b/src/gene/database/postgresql/add_fkeys.sql @@ -1,7 +1,5 @@ ALTER TABLE gene_aliases ADD CONSTRAINT gene_aliases_concept_id_fkey FOREIGN KEY (concept_id) REFERENCES gene_concepts (concept_id); -ALTER TABLE gene_associations ADD CONSTRAINT gene_associations_concept_id_fkey - FOREIGN KEY (concept_id) REFERENCES gene_concepts (concept_id); ALTER TABLE gene_previous_symbols ADD CONSTRAINT gene_previous_symbols_concept_id_fkey FOREIGN KEY (concept_id) REFERENCES gene_concepts (concept_id); diff --git a/src/gene/database/postgresql/add_indexes.sql b/src/gene/database/postgresql/add_indexes.sql index b96df534..805ad71b 100644 --- a/src/gene/database/postgresql/add_indexes.sql +++ b/src/gene/database/postgresql/add_indexes.sql @@ -7,7 +7,5 @@ CREATE INDEX idx_gps_symbol_low ON gene_previous_symbols (lower(prev_symbol)); CREATE INDEX idx_ga_alias_low ON gene_aliases (lower(alias)); CREATE INDEX idx_gx_xref_low ON gene_xrefs (lower(xref)); -CREATE INDEX idx_g_as_association_low - ON gene_associations (lower(associated_with)); CREATE INDEX idx_rlv_concept_id_low ON record_lookup_view (lower(concept_id)); diff --git a/src/gene/database/postgresql/create_record_lookup_view.sql b/src/gene/database/postgresql/create_record_lookup_view.sql index 1e33977f..7474a07a 100644 --- a/src/gene/database/postgresql/create_record_lookup_view.sql +++ b/src/gene/database/postgresql/create_record_lookup_view.sql @@ -7,7 +7,6 @@ SELECT gc.concept_id, gc.locations, gc.gene_type, ga.aliases, - gas.associated_with, gps.previous_symbols, gs.symbol, gx.xrefs, @@ -20,11 +19,6 @@ FULL JOIN ( FROM gene_aliases ga_1 GROUP BY ga_1.concept_id ) ga ON gc.concept_id::text = ga.concept_id::text -FULL JOIN ( - SELECT gas_1.concept_id, array_agg(gas_1.associated_with) AS associated_with - FROM gene_associations gas_1 - GROUP BY gas_1.concept_id -) gas ON gc.concept_id::text = gas.concept_id::text FULL JOIN ( SELECT gps_1.concept_id, array_agg(gps_1.prev_symbol) AS previous_symbols FROM gene_previous_symbols gps_1 diff --git a/src/gene/database/postgresql/create_tables.sql b/src/gene/database/postgresql/create_tables.sql index 83198199..9100e553 100644 --- a/src/gene/database/postgresql/create_tables.sql +++ b/src/gene/database/postgresql/create_tables.sql @@ -26,7 +26,6 @@ CREATE TABLE gene_merged ( hgnc_locus_type TEXT [], ncbi_gene_type TEXT [], aliases TEXT [], - associated_with TEXT [], xrefs TEXT [] ); CREATE TABLE gene_concepts ( @@ -60,8 +59,3 @@ CREATE TABLE gene_xrefs ( xref TEXT NOT NULL, concept_id VARCHAR(127) NOT NULL REFERENCES gene_concepts (concept_id) ); -CREATE TABLE gene_associations ( - id SERIAL PRIMARY KEY, - associated_with TEXT NOT NULL, - concept_ID VARCHAR(127) NOT NULL REFERENCES gene_concepts (concept_id) -); diff --git a/src/gene/database/postgresql/delete_normalized_concepts.sql b/src/gene/database/postgresql/delete_normalized_concepts.sql index 5141c841..e5e1bdce 100644 --- a/src/gene/database/postgresql/delete_normalized_concepts.sql +++ b/src/gene/database/postgresql/delete_normalized_concepts.sql @@ -19,7 +19,6 @@ CREATE TABLE gene_merged ( hgnc_locus_type TEXT [], ncbi_gene_type TEXT [], aliases TEXT [], - associated_with TEXT [], xrefs TEXT [] ); ALTER TABLE gene_concepts ADD CONSTRAINT gene_concepts_merge_ref_fkey diff --git a/src/gene/database/postgresql/drop_fkeys.sql b/src/gene/database/postgresql/drop_fkeys.sql index f804ca1e..ba2aeef5 100644 --- a/src/gene/database/postgresql/drop_fkeys.sql +++ b/src/gene/database/postgresql/drop_fkeys.sql @@ -1,5 +1,4 @@ ALTER TABLE gene_aliases DROP CONSTRAINT gene_aliases_concept_id_fkey; -ALTER TABLE gene_associations DROP CONSTRAINT gene_associations_concept_id_fkey; ALTER TABLE gene_previous_symbols DROP CONSTRAINT gene_previous_symbols_concept_id_fkey; ALTER TABLE gene_symbols DROP CONSTRAINT gene_symbols_concept_id_fkey; diff --git a/src/gene/database/postgresql/drop_indexes.sql b/src/gene/database/postgresql/drop_indexes.sql index 7c9743d0..dd9156dc 100644 --- a/src/gene/database/postgresql/drop_indexes.sql +++ b/src/gene/database/postgresql/drop_indexes.sql @@ -4,5 +4,4 @@ DROP INDEX IF EXISTS idx_gs_symbol_low; DROP INDEX IF EXISTS idx_gps_symbol_low; DROP INDEX IF EXISTS idx_gx_xref_low; DROP INDEX IF EXISTS idx_ga_alias_low; -DROP INDEX IF EXISTS idx_g_as_association_low; DROP INDEX IF EXISTS idx_rlv_concept_id_low; diff --git a/src/gene/etl/ensembl.py b/src/gene/etl/ensembl.py index bb590047..74ac8f4b 100644 --- a/src/gene/etl/ensembl.py +++ b/src/gene/etl/ensembl.py @@ -1,7 +1,7 @@ """Defines the Ensembl ETL methods.""" import logging import re -from typing import Dict +from typing import Dict, Optional import gffutils from gffutils.feature import Feature @@ -60,8 +60,7 @@ def _transform_data(self) -> None: f_id = f.attributes.get("ID")[0].split(":")[0] if f_id == "gene": gene = self._add_gene(f, accession_numbers) - if gene: - self._load_gene(gene) + self._load_gene(gene) _logger.info("Ensembl data transform complete.") def _add_gene(self, f: Feature, accession_numbers: Dict) -> Dict: @@ -90,73 +89,46 @@ def _add_gene(self, f: Feature, accession_numbers: Dict) -> Dict: return gene_params def _add_attributes(self, f: Feature, gene: Dict) -> None: - """Add concept_id, symbol, xrefs, and associated_with to a gene record. + """Add concept_id, symbol, and xrefs to a gene record. :param f: A gene from the data :param gene: A transformed gene record """ - attributes = { - "ID": "concept_id", - "Name": "symbol", - "description": "xrefs", - "biotype": "gene_type", - } - - for attribute in f.attributes.items(): - key = attribute[0] - - if key in attributes.keys(): - val = attribute[1] - - if len(val) == 1: - val = val[0] - if key == "ID": - if val.startswith("gene"): - val = ( - f"{NamespacePrefix.ENSEMBL.value}:" - f"{val.split(':')[1]}" - ) - - if key == "description": - gene["label"] = val.split("[")[0].strip() - if "Source:" in val: - src_name = ( - val.split("[")[-1] - .split("Source:")[-1] - .split("Acc")[0] - .split(";")[0] - ) - src_id = val.split("Acc:")[-1].split("]")[0] - if ":" in src_id: - src_id = src_id.split(":")[-1] - source = self._get_xref_associated_with(src_name, src_id) - if "xrefs" in source: - gene["xrefs"] = source["xrefs"] - elif "associated_with" in source: - gene["associated_with"] = source["associated_with"] - continue - - gene[attributes[key]] = val - - def _get_xref_associated_with(self, src_name: str, src_id: str) -> Dict: - """Get xref or associated_with concept. + for key, value in f.attributes.items(): + if key == "ID" and value[0].startswith("gene"): + gene[ + "concept_id" + ] = f"{NamespacePrefix.ENSEMBL.value}:{value[0].split(':')[1]}" + elif key == "description": + pattern = "^(.*) \\[Source:([^\\s]*)?( .*)?;Acc:(.*:)?(.*)?\\]$" + matches = re.findall(pattern, value[0]) + if matches: + gene["label"] = matches[0][0] + if matches[0][1] and matches[0][4]: + gene["xrefs"] = [self._get_xref(matches[0][1], matches[0][4])] + elif key == "Name": + gene["symbol"] = value[0] + elif key == "biotype": + gene["gene_type"] = value[0] + + def _get_xref(self, src_name: str, src_id: str) -> Optional[str]: + """Get xref. :param src_name: Source name :param src_id: The source's accession number - :return: A dict containing an other identifier or xref + :return: xref, if successfully parsed """ - source = dict() - if src_name.startswith("HGNC"): - source["xrefs"] = [f"{NamespacePrefix.HGNC.value}:{src_id}"] - elif src_name.startswith("NCBI"): - source["xrefs"] = [f"{NamespacePrefix.NCBI.value}:{src_id}"] - elif src_name.startswith("UniProt"): - source["associated_with"] = [f"{NamespacePrefix.UNIPROT.value}:{src_id}"] - elif src_name.startswith("miRBase"): - source["associated_with"] = [f"{NamespacePrefix.MIRBASE.value}:{src_id}"] - elif src_name.startswith("RFAM"): - source["associated_with"] = [f"{NamespacePrefix.RFAM.value}:{src_id}"] - return source + for prefix, constrained_prefix in ( + ("HGNC", NamespacePrefix.HGNC), + ("NCBI", NamespacePrefix.NCBI), + ("UniProt", NamespacePrefix.UNIPROT), + ("miRBase", NamespacePrefix.MIRBASE), + ("RFAM", NamespacePrefix.RFAM), + ): + if src_name.startswith(prefix): + return f"{constrained_prefix.value}:{src_id}" + _logger.warning("Unrecognized source name: %:%", src_name, src_id) + return None def _add_meta(self) -> None: """Add Ensembl metadata. diff --git a/src/gene/etl/hgnc.py b/src/gene/etl/hgnc.py index 1f060935..805fbe37 100644 --- a/src/gene/etl/hgnc.py +++ b/src/gene/etl/hgnc.py @@ -6,7 +6,6 @@ from gene.etl.base import Base, GeneNormalizerEtlError from gene.schemas import ( - PREFIX_LOOKUP, Annotation, Chromosome, DataLicenseAttributes, @@ -42,9 +41,9 @@ def _transform_data(self) -> None: elif r["status"] == "Entry Withdrawn": gene["symbol_status"] = SymbolStatus.WITHDRAWN.value - # store alias, xref, associated_with, prev_symbols, location + # store alias, xref, prev_symbols, location self._get_aliases(r, gene) - self._get_xrefs_associated_with(r, gene) + self._get_xrefs(r, gene) if "prev_symbol" in r: self._get_previous_symbols(r, gene) if "location" in r: @@ -81,14 +80,13 @@ def _get_previous_symbols(self, r: Dict, gene: Dict) -> None: if prev_symbols: gene["previous_symbols"] = list(set(prev_symbols)) - def _get_xrefs_associated_with(self, record: Dict, gene: Dict) -> None: - """Store xrefs and/or associated_with refs in a gene record. + def _get_xrefs(self, record: Dict, gene: Dict) -> None: + """Store xrefs in a gene record. :param record: A gene record in the HGNC data file :param gene: A transformed gene record """ xrefs = list() - associated_with = list() sources = [ "entrez_id", "ensembl_gene_id", @@ -128,37 +126,28 @@ def _get_xrefs_associated_with(self, record: Dict, gene: Dict) -> None: key = src if key.upper() in NamespacePrefix.__members__: - if NamespacePrefix[key.upper()].value in PREFIX_LOOKUP.keys(): - self._get_xref_associated_with(key, src, record, xrefs) - else: - self._get_xref_associated_with( - key, src, record, associated_with - ) + self._get_xref(key, src, record, xrefs) else: _logger.warning(f"{key} not in schemas.py") if xrefs: gene["xrefs"] = xrefs - if associated_with: - gene["associated_with"] = associated_with - def _get_xref_associated_with( - self, key: str, src: str, r: Dict, src_type: List[str] - ) -> None: - """Add an xref or associated_with ref to a gene record. + def _get_xref(self, key: str, src: str, r: Dict, xrefs: List[str]) -> None: + """Add an xref to a gene record. :param key: The source's name :param src: HGNC's source field :param r: A gene record in the HGNC data file - :param src_type: Either xrefs or associated_with list + :param xrefs: xrefs list """ if isinstance(r[src], list): for xref in r[src]: - src_type.append(f"{NamespacePrefix[key.upper()].value}:{xref}") + xrefs.append(f"{NamespacePrefix[key.upper()].value}:{xref}") else: if isinstance(r[src], str) and ":" in r[src]: r[src] = r[src].split(":")[-1].strip() - src_type.append(f"{NamespacePrefix[key.upper()].value}" f":{r[src]}") + xrefs.append(f"{NamespacePrefix[key.upper()].value}" f":{r[src]}") def _get_location(self, r: Dict, gene: Dict) -> None: """Store GA4GH VRS ChromosomeLocation in a gene record. diff --git a/src/gene/etl/merge.py b/src/gene/etl/merge.py index d065be73..f375a32a 100644 --- a/src/gene/etl/merge.py +++ b/src/gene/etl/merge.py @@ -5,7 +5,7 @@ from gene.database import AbstractDatabase from gene.database.database import DatabaseWriteError -from gene.schemas import GeneTypeFieldName, RecordType, SourcePriority +from gene.schemas import GeneTypeFieldName, NamespacePrefix, RecordType, SourcePriority _logger = logging.getLogger(__name__) @@ -98,7 +98,14 @@ def _create_record_id_set( if not record_xrefs: return observed_id_set | {db_record["concept_id"]} else: - local_id_set = set(record_xrefs) + local_id_set = set() + for xref in record_xrefs: + if ( + xref.startswith(NamespacePrefix.NCBI.value) + or xref.startswith(NamespacePrefix.ENSEMBL.value) + or xref.startswith(NamespacePrefix.HGNC.value) + ): + local_id_set.add(xref) merged_id_set = {record_id} | observed_id_set for local_record_id in local_id_set - observed_id_set: merged_id_set |= self._create_record_id_set( @@ -145,19 +152,22 @@ def record_order(record: Dict) -> Tuple: merged_attrs = { "concept_id": records[0]["concept_id"], "aliases": set(), - "associated_with": set(), "previous_symbols": set(), "hgnc_locus_type": set(), "ncbi_gene_type": set(), "ensembl_biotype": set(), "strand": set(), } - if len(records) > 1: - merged_attrs["xrefs"] = list({r["concept_id"] for r in records[1:]}) # merge from constituent records - set_fields = ["aliases", "associated_with", "previous_symbols", "strand"] - scalar_fields = ["symbol", "symbol_status", "label", "location_annotations"] + set_fields = ["aliases", "previous_symbols", "strand"] + scalar_fields = [ + "symbol", + "symbol_status", + "label", + "location_annotations", + "xrefs", + ] for record in records: for field in set_fields: merged_attrs[field] |= set(record.get(field, set())) diff --git a/src/gene/etl/ncbi.py b/src/gene/etl/ncbi.py index 427d57e6..a5954bbe 100644 --- a/src/gene/etl/ncbi.py +++ b/src/gene/etl/ncbi.py @@ -98,14 +98,13 @@ def _get_prev_symbols(self) -> Dict[str, str]: history_file.close() return prev_symbols - def _add_xrefs_associated_with(self, val: List[str], params: Dict) -> None: - """Add xrefs and associated_with refs to a transformed gene. + def _add_xrefs(self, val: List[str], params: Dict) -> None: + """Add xrefs to a transformed gene. :param val: A list of source ids for a given gene :param params: A transformed gene record """ params["xrefs"] = [] - params["associated_with"] = [] for src in val: src_name = src.split(":")[0].upper() src_id = src.split(":")[-1] @@ -125,16 +124,12 @@ def _add_xrefs_associated_with(self, val: List[str], params: Dict) -> None: prefix = NamespacePrefix.IMGT_GENE_DB.value elif src_name.startswith("MIRBASE"): prefix = NamespacePrefix.MIRBASE.value - else: - prefix = None - if prefix: - params["associated_with"].append(f"{prefix}:{src_id}") else: _logger.info(f"{src_name} is not in NameSpacePrefix.") + continue + params["xrefs"].append(f"{prefix}:{src_id}") if not params["xrefs"]: del params["xrefs"] - if not params["associated_with"]: - del params["associated_with"] def _get_gene_info(self, prev_symbols: Dict[str, str]) -> Dict[str, str]: """Store genes from NCBI info file. @@ -158,10 +153,10 @@ def _get_gene_info(self, prev_symbols: Dict[str, str]) -> Dict[str, str]: params["aliases"] = row[4].split("|") else: params["aliases"] = [] - # get associated_with + # get xrefs if row[5] != "-": - associated_with = row[5].split("|") - self._add_xrefs_associated_with(associated_with, params) + xrefs = row[5].split("|") + self._add_xrefs(xrefs, params) # get chromosome location vrs_chr_location = self._get_vrs_chr_location(row, params) if "exclude" in vrs_chr_location: @@ -223,7 +218,7 @@ def _add_gff_gene( return params def _add_attributes(self, f: gffutils.feature.Feature, gene: Dict) -> None: - """Add concept_id, symbol, and xrefs/associated_with to a gene record. + """Add concept_id, symbol, and xrefs to a gene record. :param gffutils.feature.Feature f: A gene from the data :param gene: A transformed gene record @@ -239,7 +234,7 @@ def _add_attributes(self, f: gffutils.feature.Feature, gene: Dict) -> None: val = val[0] if key == "Dbxref": - self._add_xrefs_associated_with(val, gene) + self._add_xrefs(val, gene) elif key == "Name": gene["symbol"] = val @@ -258,25 +253,24 @@ def _get_vrs_sq_location( params["strand"] = gene.strand return self._build_sequence_location(gene.seqid, gene, params["concept_id"]) - def _get_xref_associated_with(self, src_name: str, src_id: str) -> Dict: - """Get xref or associated_with ref. + def _get_xref(self, src_name: str, src_id: str) -> Dict: + """Get xref. :param src_name: Source name :param src_id: The source's accession number - :return: A dict containing an xref or associated_with ref + :return: A dict containing an xref """ - source = dict() - if src_name.startswith("HGNC"): - source["xrefs"] = [f"{NamespacePrefix.HGNC.value}:{src_id}"] - elif src_name.startswith("NCBI"): - source["xrefs"] = [f"{NamespacePrefix.NCBI.value}:{src_id}"] - elif src_name.startswith("UniProt"): - source["associated_with"] = [f"{NamespacePrefix.UNIPROT.value}:{src_id}"] - elif src_name.startswith("miRBase"): - source["associated_with"] = [f"{NamespacePrefix.MIRBASE.value}:{src_id}"] - elif src_name.startswith("RFAM"): - source["associated_with"] = [f"{NamespacePrefix.RFAM.value}:{src_id}"] - return source + for prefix, constrained_prefix in ( + ("HGNC", NamespacePrefix.HGNC), + ("NCBI", NamespacePrefix.NCBI), # ? + ("UniProt", NamespacePrefix.UNIPROT), + ("miRBase", NamespacePrefix.MIRBASE), + ("RFAM", NamespacePrefix.RFAM), + ): + if src_name.startswith(prefix): + return {"xrefs": [f"{constrained_prefix.value}:{src_id}"]} + _logger.warning("Unrecognized source name: %:%", src_name, src_id) + return {} def _get_vrs_chr_location(self, row: List[str], params: Dict) -> List: """Store GA4GH VRS ChromosomeLocation in a gene record. diff --git a/src/gene/query.py b/src/gene/query.py index 8c100446..0a57be43 100644 --- a/src/gene/query.py +++ b/src/gene/query.py @@ -375,9 +375,8 @@ def _add_gene( ) # mappings - source_ids = record.get("xrefs", []) + record.get("associated_with", []) mappings = [] - for source_id in source_ids: + for source_id in record.get("xrefs", []): system, code = source_id.split(":") mappings.append( core_models.Mapping( diff --git a/src/gene/schemas.py b/src/gene/schemas.py index d2c17d84..96b6fc4b 100644 --- a/src/gene/schemas.py +++ b/src/gene/schemas.py @@ -58,7 +58,6 @@ class MatchType(IntEnum): PREV_SYMBOL = 80 ALIAS = 60 XREF = 60 - ASSOCIATED_WITH = 60 FUZZY_MATCH = 20 NO_MATCH = 0 @@ -102,7 +101,6 @@ class BaseGene(BaseModel): aliases: List[StrictStr] = [] previous_symbols: List[StrictStr] = [] xrefs: List[CURIE] = [] - associated_with: List[CURIE] = [] gene_type: Optional[StrictStr] = None @@ -242,7 +240,6 @@ class RefType(str, Enum): PREVIOUS_SYMBOLS = "prev_symbol" ALIASES = "alias" XREFS = "xref" - ASSOCIATED_WITH = "associated_with" # collective name to singular name, e.g. {"previous_symbols": "prev_symbol"} @@ -561,8 +558,9 @@ class UnmergedNormalizationService(BaseNormalizationService): ], "aliases": ["3.1.1.7"], "previous_symbols": ["YT"], - "xrefs": ["ncbigene:43", "ensembl:ENSG00000087085"], - "associated_with": [ + "xrefs": [ + "ncbigene:43", + "ensembl:ENSG00000087085", "ucsc:uc003uxi.4", "vega:OTTHUMG00000157033", "merops:S09.979", @@ -671,8 +669,6 @@ class UnmergedNormalizationService(BaseNormalizationService): "xrefs": [ "hgnc:108", "ensembl:ENSG00000087085", - ], - "associated_with": [ "omim:100740", ], "gene_type": "protein-coding", diff --git a/tests/conftest.py b/tests/conftest.py index ad1a14a2..923d71ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -41,7 +41,6 @@ def _compare_records(normalized_gene, test_gene, match_type): assert set(normalized_gene.xrefs) == set(test_gene.xrefs) assert normalized_gene.symbol_status == test_gene.symbol_status assert set(normalized_gene.previous_symbols) == set(test_gene.previous_symbols) - assert set(normalized_gene.associated_with) == set(test_gene.associated_with) assert normalized_gene.symbol == test_gene.symbol assert len(normalized_gene.locations) == len(test_gene.locations) for loc in normalized_gene.locations: diff --git a/tests/unit/test_database_and_etl.py b/tests/unit/test_database_and_etl.py index 092cc6c3..62a4154e 100644 --- a/tests/unit/test_database_and_etl.py +++ b/tests/unit/test_database_and_etl.py @@ -76,7 +76,6 @@ def test_tables_created(db_fixture): existing_tables = db_fixture.db.list_tables() if db_fixture.db_name == "PostgresDatabase": assert set(existing_tables) == { - "gene_associations", "gene_symbols", "gene_previous_symbols", "gene_aliases", @@ -150,11 +149,6 @@ def test_item_type(db_fixture): assert "item_type" in item assert item["item_type"] == "alias" - filter_exp = Key("label_and_type").eq("omim:606689##associated_with") - item = db_fixture.db.genes.query(KeyConditionExpression=filter_exp)["Items"][0] - assert "item_type" in item - assert item["item_type"] == "associated_with" - filter_exp = Key("label_and_type").eq("ensembl:ensg00000268895##xref") item = db_fixture.db.genes.query(KeyConditionExpression=filter_exp)["Items"][0] assert "item_type" in item diff --git a/tests/unit/test_ensembl_source.py b/tests/unit/test_ensembl_source.py index 1ab55430..e9980579 100644 --- a/tests/unit/test_ensembl_source.py +++ b/tests/unit/test_ensembl_source.py @@ -47,7 +47,6 @@ def ddx11l1(): } ], "strand": "+", - "associated_with": [], "gene_type": "transcribed_unprocessed_pseudogene", } return Gene(**params) @@ -79,7 +78,6 @@ def tp53(): } ], "strand": "-", - "associated_with": [], "gene_type": "protein_coding", } return Gene(**params) @@ -111,7 +109,6 @@ def ATP6AP1_DT(): # noqa: N802 } ], "strand": "-", - "associated_with": [], "gene_type": "lncRNA", } return Gene(**params) @@ -127,7 +124,6 @@ def hsa_mir_1253(): "label": "hsa-mir-1253", "previous_symbols": [], "aliases": [], - "xrefs": [], "symbol_status": None, "location_annotations": [], "locations": [ @@ -143,7 +139,7 @@ def hsa_mir_1253(): } ], "strand": "+", - "associated_with": ["mirbase:MI0006387"], + "xrefs": ["mirbase:MI0006387"], "gene_type": "lncRNA", } return Gene(**params) @@ -175,7 +171,6 @@ def spry3(): } ], "strand": "+", - "associated_with": [], "gene_type": "protein_coding", } return Gene(**params) @@ -254,9 +249,9 @@ def test_hsa_mir_1253(check_resp_single_record, ensembl, hsa_mir_1253): resp = ensembl.search("hsa-mir-1253") check_resp_single_record(resp, hsa_mir_1253, MatchType.SYMBOL) - # associated_with + # xref resp = ensembl.search("mirbase:MI0006387") - check_resp_single_record(resp, hsa_mir_1253, MatchType.ASSOCIATED_WITH) + check_resp_single_record(resp, hsa_mir_1253, MatchType.XREF) def test_spry3(check_resp_single_record, ensembl, spry3): diff --git a/tests/unit/test_hgnc_source.py b/tests/unit/test_hgnc_source.py index 185809fe..2d35c028 100644 --- a/tests/unit/test_hgnc_source.py +++ b/tests/unit/test_hgnc_source.py @@ -49,7 +49,9 @@ def a1bg_as1(): "previous_symbols": ["NCRNA00181", "A1BGAS", "A1BG-AS"], "aliases": ["FLJ23569"], "symbol_status": "approved", - "associated_with": [ + "xrefs": [ + "ensembl:ENSG00000268895", + "ncbigene:503538", "vega:OTTHUMG00000183508", "ucsc:uc002qse.3", "refseq:NR_015380", @@ -57,7 +59,6 @@ def a1bg_as1(): "refseq:NR_015380", "ena.embl:BC040926", ], - "xrefs": ["ensembl:ENSG00000268895", "ncbigene:503538"], "gene_type": "RNA, long non-coding", } return Gene(**params) @@ -86,7 +87,7 @@ def tp53(): "previous_symbols": [], "aliases": ["p53", "LFS1"], "symbol_status": "approved", - "associated_with": [ + "xrefs": [ "vega:OTTHUMG00000162125", "refseq:NM_000546", "cosmic:TP53", @@ -110,8 +111,9 @@ def tp53(): "pubmed:6396087", "pubmed:3456488", "pubmed:2047879", + "ensembl:ENSG00000141510", + "ncbigene:7157", ], - "xrefs": ["ensembl:ENSG00000141510", "ncbigene:7157"], "gene_type": "gene with protein product", } return Gene(**params) @@ -140,8 +142,9 @@ def a3galt2(): "previous_symbols": ["A3GALT2P"], "aliases": ["IGBS3S", "IGB3S"], "symbol_status": "approved", - "xrefs": ["ensembl:ENSG00000184389", "ncbigene:127550"], - "associated_with": [ + "xrefs": [ + "ensembl:ENSG00000184389", + "ncbigene:127550", "vega:OTTHUMG00000004125", "vega:OTTHUMG00000004125", "ucsc:uc031plq.1", @@ -180,8 +183,9 @@ def wdhd1(): "previous_symbols": [], "aliases": ["AND-1", "CTF4", "CHTF4"], "symbol_status": "approved", - "xrefs": ["ensembl:ENSG00000198554", "ncbigene:11169"], - "associated_with": [ + "xrefs": [ + "ensembl:ENSG00000198554", + "ncbigene:11169", "vega:OTTHUMG00000140304", "refseq:NM_007086", "omim:608126", @@ -212,8 +216,12 @@ def g6pr(): "previous_symbols": [], "aliases": ["GSD1aSP"], "symbol_status": "approved", - "xrefs": ["ncbigene:2541"], - "associated_with": ["pubmed:2172641", "pubmed:7814621", "pubmed:2996501"], + "xrefs": [ + "ncbigene:2541", + "pubmed:2172641", + "pubmed:7814621", + "pubmed:2996501", + ], "gene_type": "unknown", } return Gene(**params) @@ -233,8 +241,7 @@ def pirc24(): "previous_symbols": [], "aliases": [], "symbol_status": "approved", - "xrefs": ["ncbigene:100313810"], - "associated_with": ["pubmed:17881367"], + "xrefs": ["ncbigene:100313810", "pubmed:17881367"], "gene_type": "RNA, cluster", } return Gene(**params) @@ -263,8 +270,8 @@ def gage4(): "previous_symbols": [], "aliases": ["CT4.4"], "symbol_status": "approved", - "xrefs": ["ncbigene:2576"], - "associated_with": [ + "xrefs": [ + "ncbigene:2576", "refseq:NM_001474", "omim:300597", "uniprot:P0DSO3", @@ -290,8 +297,9 @@ def mafip(): "previous_symbols": [], "aliases": ["FLJ35473", "FLJ00219", "FLJ39633", "MIP", "pp5644", "TEKT4P4"], "symbol_status": "approved", - "xrefs": ["ensembl:ENSG00000274847", "ncbigene:727764"], - "associated_with": [ + "xrefs": [ + "ensembl:ENSG00000274847", + "ncbigene:727764", "vega:OTTHUMG00000188065", "refseq:NR_046439", "uniprot:Q8WZ33", @@ -319,8 +327,7 @@ def mt_7sdna(): "previous_symbols": ["MT7SDNA"], "aliases": [], "symbol_status": "approved", - "xrefs": [], - "associated_with": ["pubmed:24709344", "pubmed:273237"], + "xrefs": ["pubmed:24709344", "pubmed:273237"], "gene_type": "region", } return Gene(**params) @@ -350,7 +357,6 @@ def cecr(): "aliases": [], "symbol_status": "approved", "xrefs": ["ncbigene:1055"], - "associated_with": [], "gene_type": "region", } return Gene(**params) @@ -387,8 +393,9 @@ def csf2ra(): "previous_symbols": ["CSF2R"], "aliases": ["CD116", "alphaGMR"], "symbol_status": "approved", - "xrefs": ["ensembl:ENSG00000198223", "ncbigene:1438"], - "associated_with": [ + "xrefs": [ + "ensembl:ENSG00000198223", + "ncbigene:1438", "vega:OTTHUMG00000012533", "refseq:NM_001161529", "orphanet:209477", @@ -435,8 +442,7 @@ def rps24p5(): "previous_symbols": [], "aliases": [], "symbol_status": "approved", - "xrefs": ["ncbigene:100271094"], - "associated_with": ["refseq:NG_011274", "pubmed:19123937"], + "xrefs": ["ncbigene:100271094", "refseq:NG_011274", "pubmed:19123937"], "gene_type": "pseudogene", } return Gene(**params) @@ -465,8 +471,7 @@ def trl_cag2_1(): "previous_symbols": ["TRNAL13"], "aliases": ["tRNA-Leu-CAG-2-1"], "symbol_status": "approved", - "xrefs": ["ncbigene:100189130"], - "associated_with": ["ena.embl:HG983896"], + "xrefs": ["ncbigene:100189130", "ena.embl:HG983896"], "gene_type": "RNA, transfer", } return Gene(**params) @@ -495,8 +500,9 @@ def myo5b(): "previous_symbols": [], "aliases": ["KIAA1119"], "symbol_status": "approved", - "xrefs": ["ensembl:ENSG00000167306", "ncbigene:4645"], - "associated_with": [ + "xrefs": [ + "ensembl:ENSG00000167306", + "ncbigene:4645", "vega:OTTHUMG00000179843", "refseq:NM_001080467", "omim:606540", @@ -539,7 +545,7 @@ def gstt1(): "previous_symbols": [], "aliases": ["2.5.1.18"], "symbol_status": "approved", - "associated_with": [ + "xrefs": [ "refseq:NM_000853", "omim:600436", "ucsc:uc002zze.4", @@ -547,8 +553,9 @@ def gstt1(): "orphanet:470418", "ena.embl:KI270879", "pubmed:8617495", + "ensembl:ENSG00000277656", + "ncbigene:2952", ], - "xrefs": ["ensembl:ENSG00000277656", "ncbigene:2952"], "gene_type": "gene with protein product", } return Gene(**params) @@ -772,9 +779,9 @@ def test_myo5b(check_resp_single_record, myo5b, hgnc): resp = hgnc.search("MYO5B") check_resp_single_record(resp, myo5b, MatchType.SYMBOL) - # associated_with + # xref resp = hgnc.search("refseq:NM_001080467") - check_resp_single_record(resp, myo5b, MatchType.ASSOCIATED_WITH) + check_resp_single_record(resp, myo5b, MatchType.XREF) def test_gstt1(check_resp_single_record, gstt1, hgnc): @@ -787,9 +794,9 @@ def test_gstt1(check_resp_single_record, gstt1, hgnc): resp = hgnc.search("GSTT1") check_resp_single_record(resp, gstt1, MatchType.SYMBOL) - # associated_with + # xref resp = hgnc.search("omim:600436") - check_resp_single_record(resp, gstt1, MatchType.ASSOCIATED_WITH) + check_resp_single_record(resp, gstt1, MatchType.XREF) def test_no_match(hgnc): diff --git a/tests/unit/test_ncbi_source.py b/tests/unit/test_ncbi_source.py index f7b7508c..95504401 100644 --- a/tests/unit/test_ncbi_source.py +++ b/tests/unit/test_ncbi_source.py @@ -22,7 +22,6 @@ def check_ncbi_discontinued_gene(normalizer_response, concept_id, symbol, match_ assert resp.aliases == [] assert resp.previous_symbols == [] assert resp.xrefs == [] - assert resp.associated_with == [] @pytest.fixture(scope="module") @@ -50,9 +49,8 @@ def dpf1(): "concept_id": "ncbigene:8193", "symbol": "DPF1", "aliases": ["BAF45b", "NEUD4", "neuro-d4", "SMARCG1"], - "xrefs": ["hgnc:20225", "ensembl:ENSG00000011332"], "previous_symbols": [], - "associated_with": ["omim:601670"], + "xrefs": ["hgnc:20225", "ensembl:ENSG00000011332", "omim:601670"], "symbol_status": None, "location_annotations": [], "strand": "-", @@ -90,9 +88,8 @@ def pdp1_symbol(): "concept_id": "ncbigene:54704", "symbol": "PDP1", "aliases": ["PDH", "PDP", "PDPC", "PPM2A", "PPM2C"], - "xrefs": ["hgnc:9279", "ensembl:ENSG00000164951"], + "xrefs": ["hgnc:9279", "ensembl:ENSG00000164951", "omim:605993"], "previous_symbols": ["LOC157663", "PPM2C"], - "associated_with": ["omim:605993"], "symbol_status": None, "location_annotations": [], "strand": "+", @@ -130,9 +127,8 @@ def pdp1_alias(): "concept_id": "ncbigene:403313", "symbol": "PLPP6", "aliases": ["PDP1", "PSDP", "PPAPDC2", "bA6J24.6", "LPRP-B", "PA-PSP"], - "xrefs": ["hgnc:23682", "ensembl:ENSG00000205808"], + "xrefs": ["hgnc:23682", "ensembl:ENSG00000205808", "omim:611666"], "previous_symbols": [], - "associated_with": ["omim:611666"], "symbol_status": None, "location_annotations": [], "strand": "+", @@ -171,9 +167,8 @@ def spry3(): "concept_id": "ncbigene:10251", "symbol": "SPRY3", "aliases": ["spry-3"], - "xrefs": ["hgnc:11271", "ensembl:ENSG00000168939"], + "xrefs": ["hgnc:11271", "ensembl:ENSG00000168939", "omim:300531"], "previous_symbols": ["LOC170187", "LOC253479"], - "associated_with": ["omim:300531"], "symbol_status": None, "location_annotations": [], "strand": "+", @@ -232,7 +227,6 @@ def adcp1(): "aliases": [], "xrefs": ["hgnc:229"], "previous_symbols": [], - "associated_with": [], "symbol_status": None, "strand": None, "location_annotations": ["6"], @@ -252,9 +246,8 @@ def afa(): "concept_id": "ncbigene:170", "symbol": "AFA", "aliases": [], - "xrefs": [], "previous_symbols": [], - "associated_with": ["omim:106250"], + "xrefs": ["omim:106250"], "symbol_status": None, "strand": None, "location_annotations": [], @@ -274,9 +267,8 @@ def znf84(): "concept_id": "ncbigene:7637", "symbol": "ZNF84", "aliases": ["HPF2"], - "xrefs": ["hgnc:13159", "ensembl:ENSG00000198040"], + "xrefs": ["hgnc:13159", "ensembl:ENSG00000198040", "omim:618554"], "previous_symbols": ["LOC100287429"], - "associated_with": ["omim:618554"], "symbol_status": None, "location_annotations": ["map from Rosati ref via FISH [AFS]"], "strand": "+", @@ -315,9 +307,14 @@ def slc25a6(): "concept_id": "ncbigene:293", "symbol": "SLC25A6", "aliases": ["AAC3", "ANT", "ANT 2", "ANT 3", "ANT3", "ANT3Y"], - "xrefs": ["hgnc:10992", "ensembl:ENSG00000169100", "ensembl:ENSG00000292334"], + "xrefs": [ + "hgnc:10992", + "ensembl:ENSG00000169100", + "ensembl:ENSG00000292334", + "omim:300151", + "omim:403000", + ], "previous_symbols": ["ANT3Y"], - "associated_with": ["omim:300151", "omim:403000"], "symbol_status": None, "location_annotations": [], "strand": "-", @@ -376,7 +373,6 @@ def loc106783576(): "aliases": [], "xrefs": [], "previous_symbols": [], - "associated_with": [], "symbol_status": None, "location_annotations": [], "strand": None, @@ -405,9 +401,8 @@ def glc1b(): "concept_id": "ncbigene:2722", "symbol": "GLC1B", "aliases": [], - "xrefs": [], "previous_symbols": [], - "associated_with": ["omim:606689"], + "xrefs": ["omim:606689"], "symbol_status": None, "location_annotations": [], "strand": None, @@ -436,9 +431,8 @@ def hdpa(): "concept_id": "ncbigene:50829", "symbol": "HDPA", "aliases": [], - "xrefs": [], "previous_symbols": [], - "associated_with": ["omim:300221"], + "xrefs": ["omim:300221"], "symbol_status": None, "location_annotations": [], "strand": None, @@ -470,7 +464,6 @@ def prkrap1(): "aliases": [], "xrefs": ["hgnc:33447"], "previous_symbols": ["LOC100289695"], - "associated_with": [], "symbol_status": None, "location_annotations": ["alternate reference locus"], "strand": "+", @@ -519,9 +512,8 @@ def mhb(): "concept_id": "ncbigene:619511", "symbol": "MHB", "aliases": [], - "xrefs": [], "previous_symbols": [], - "associated_with": ["omim:255160"], + "xrefs": ["omim:255160"], "symbol_status": None, "location_annotations": [], "strand": None, @@ -550,9 +542,8 @@ def spg37(): "concept_id": "ncbigene:100049159", "symbol": "SPG37", "aliases": [], - "xrefs": [], "previous_symbols": [], - "associated_with": ["omim:611945"], + "xrefs": ["omim:611945"], "symbol_status": None, "location_annotations": [], "strand": None, @@ -607,9 +598,9 @@ def test_dpf1(check_resp_single_record, ncbi, dpf1): resp = ncbi.search("neuro-d4") check_resp_single_record(resp, dpf1, MatchType.ALIAS) - # associated_with + # xref resp = ncbi.search("omim:601670") - check_resp_single_record(resp, dpf1, MatchType.ASSOCIATED_WITH) + check_resp_single_record(resp, dpf1, MatchType.XREF) # No Match resp = ncbi.search("DPF 1") @@ -751,9 +742,9 @@ def test_glc1b(check_resp_single_record, ncbi, glc1b): resp = ncbi.search("GLC1B") check_resp_single_record(resp, glc1b, MatchType.SYMBOL) - # associated_with + # xref resp = ncbi.search("omim:606689") - check_resp_single_record(resp, glc1b, MatchType.ASSOCIATED_WITH) + check_resp_single_record(resp, glc1b, MatchType.XREF) def test_hdpa(check_resp_single_record, ncbi, hdpa): @@ -792,9 +783,9 @@ def test_mhb(check_resp_single_record, ncbi, mhb): resp = ncbi.search("MHB") check_resp_single_record(resp, mhb, MatchType.SYMBOL) - # associated_with + # xref resp = ncbi.search("OMIM:255160") - check_resp_single_record(resp, mhb, MatchType.ASSOCIATED_WITH) + check_resp_single_record(resp, mhb, MatchType.XREF) def test_spg37(check_resp_single_record, ncbi, spg37): @@ -807,9 +798,9 @@ def test_spg37(check_resp_single_record, ncbi, spg37): resp = ncbi.search("SPG37") check_resp_single_record(resp, spg37, MatchType.SYMBOL) - # associated_with + # xref resp = ncbi.search("omim:611945") - check_resp_single_record(resp, spg37, MatchType.ASSOCIATED_WITH) + check_resp_single_record(resp, spg37, MatchType.XREF) def test_discontinued_genes(ncbi): diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index bfb11460..f9b08927 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -702,7 +702,6 @@ def normalize_unmerged_loc_653303(): "aliases": [], "previous_symbols": ["LOC196266", "LOC731196", "LOC654080"], "xrefs": [], - "associated_with": [], "gene_type": "pseudo", } ] @@ -745,8 +744,9 @@ def normalize_unmerged_chaf1a(): "CAF-1", ], "previous_symbols": [], - "xrefs": ["ensembl:ENSG00000167670", "ncbigene:10036"], - "associated_with": [ + "xrefs": [ + "ensembl:ENSG00000167670", + "ncbigene:10036", "vega:OTTHUMG00000181922", "ccds:CCDS32875", "ucsc:uc002mal.4", @@ -784,7 +784,6 @@ def normalize_unmerged_chaf1a(): "aliases": [], "previous_symbols": [], "xrefs": ["hgnc:1910"], - "associated_with": [], "gene_type": "protein_coding", } ], @@ -820,8 +819,11 @@ def normalize_unmerged_chaf1a(): ], "aliases": ["CAF1P150", "P150", "CAF1", "CAF1B", "CAF-1"], "previous_symbols": ["LOC107985297"], - "xrefs": ["ensembl:ENSG00000167670", "hgnc:1910"], - "associated_with": ["omim:601246"], + "xrefs": [ + "ensembl:ENSG00000167670", + "hgnc:1910", + "omim:601246", + ], "gene_type": "protein-coding", } ] @@ -867,8 +869,7 @@ def normalize_unmerged_ache(): ], "aliases": ["YT", "ARACHE", "ACEE", "N-ACHE"], "previous_symbols": ["ACEE"], - "xrefs": ["hgnc:108", "ensembl:ENSG00000087085"], - "associated_with": ["omim:100740"], + "xrefs": ["hgnc:108", "ensembl:ENSG00000087085", "omim:100740"], "gene_type": "protein-coding", } ], @@ -897,7 +898,6 @@ def normalize_unmerged_ache(): "aliases": [], "previous_symbols": [], "xrefs": ["hgnc:108"], - "associated_with": [], "gene_type": "protein_coding", } ] @@ -923,8 +923,9 @@ def normalize_unmerged_ache(): ], "aliases": ["3.1.1.7"], "previous_symbols": ["YT"], - "xrefs": ["ncbigene:43", "ensembl:ENSG00000087085"], - "associated_with": [ + "xrefs": [ + "ncbigene:43", + "ensembl:ENSG00000087085", "ucsc:uc003uxi.4", "vega:OTTHUMG00000157033", "merops:S09.979", @@ -1050,7 +1051,6 @@ def compare_unmerged_record(gene, test_gene): assert set(gene.xrefs) == set(test_gene.xrefs) assert gene.symbol_status == test_gene.symbol_status assert set(gene.previous_symbols) == set(test_gene.previous_symbols) - assert set(gene.associated_with) == set(test_gene.associated_with) assert gene.symbol == test_gene.symbol assert len(gene.locations) == len(test_gene.locations) for loc in gene.locations: @@ -1259,7 +1259,7 @@ def test_ache_query(query_handler, num_sources, normalized_ache, source_meta): compare_normalize_resp( resp, q, - MatchType.ASSOCIATED_WITH, + MatchType.XREF, normalized_ache, expected_source_meta=source_meta, ) @@ -1337,7 +1337,7 @@ def test_braf_query(query_handler, num_sources, normalized_braf, source_meta): compare_normalize_resp( resp, q, - MatchType.ASSOCIATED_WITH, + MatchType.XREF, normalized_braf, expected_source_meta=source_meta, ) @@ -1439,7 +1439,7 @@ def test_abl1_query(query_handler, num_sources, normalized_abl1, source_meta): compare_normalize_resp( resp, q, - MatchType.ASSOCIATED_WITH, + MatchType.XREF, normalized_abl1, expected_source_meta=source_meta, ) @@ -1572,18 +1572,14 @@ def test_normalize_unmerged( resp = query_handler.normalize_unmerged(q) compare_unmerged_response(resp, q, [], MatchType.ALIAS, normalize_unmerged_chaf1a) - # assoc with + # xref q = "omim:100740" resp = query_handler.normalize_unmerged(q) - compare_unmerged_response( - resp, q, [], MatchType.ASSOCIATED_WITH, normalize_unmerged_ache - ) + compare_unmerged_response(resp, q, [], MatchType.XREF, normalize_unmerged_ache) q = "uniprot:Q13111" resp = query_handler.normalize_unmerged(q) - compare_unmerged_response( - resp, q, [], MatchType.ASSOCIATED_WITH, normalize_unmerged_chaf1a - ) + compare_unmerged_response(resp, q, [], MatchType.XREF, normalize_unmerged_chaf1a) def test_invalid_queries(query_handler): diff --git a/tests/unit/test_schemas.py b/tests/unit/test_schemas.py index 3d5fceed..afe56b84 100644 --- a/tests/unit/test_schemas.py +++ b/tests/unit/test_schemas.py @@ -78,15 +78,6 @@ def test_gene(gene, sequence_location): xrefs=["hgnc", "hgnc:1"], ) - # associated_with not a valid curie - with pytest.raises(pydantic.ValidationError): - Gene( - match_type=100, - concept_id="hgnc:1096", - symbol="BRAF", - associated_with=["hgnc", "hgnc:1"], - ) - # symbol status invalid with pytest.raises(pydantic.ValidationError): Gene(