diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index f56ea13..3f47ff8 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -103,7 +103,7 @@ jobs: runs-on: ubuntu-latest environment: name: pypi - url: https://pypi.org/p/chainlifter + url: https://pypi.org/p/agct permissions: id-token: write # IMPORTANT: mandatory for trusted publishing if: "startsWith(github.ref, 'refs/tags/')" diff --git a/README.md b/README.md index bc0246c..d513810 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,22 @@ -Drop-in replacement for the [pyliftover](https://github.com/konstantint/pyliftover) tool. Name forthcoming. +# agct: Another Genome Conversion Tool -Status: very, very preliminary. +Drop-in replacement for the [pyliftover](https://github.com/konstantint/pyliftover) tool, using the St. Jude's [chainfile](https://docs.rs/chainfile/latest/chainfile/) crate. Enables significantly faster chainfile loading from cold start (see `analysis/`). + +Status: alpha. ## Usage Initialize a class instance: ```python3 -from chainlifter.lifter import ChainLifter -ch = ChainLifter("hg38", "hg19") +from agct import Converter +c = Converter("hg38", "hg19") ``` Call ``convert_coordinate()``: ```python3 -ch.convert_coordinate("chr7", 140453136, "+") +c.convert_coordinate("chr7", 140453136, "+") # [['chr7', '140152936', '+']] ``` diff --git a/analysis/speed_test.ipynb b/analysis/speed_test.ipynb index e84d6e0..deb7d6e 100644 --- a/analysis/speed_test.ipynb +++ b/analysis/speed_test.ipynb @@ -16,7 +16,7 @@ "outputs": [], "source": [ "from pyliftover import LiftOver \n", - "from chainlifter.lifter import ChainLifter" + "from agct import Converter" ] }, { @@ -45,7 +45,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1.33 s ± 161 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "1.11 s ± 26.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -64,13 +64,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "234 ms ± 7.96 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "217 ms ± 9.13 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%%timeit\n", - "ch = ChainLifter(\"hg38\", \"hg19\")" + "converter = Converter(\"hg38\", \"hg19\")" ] }, { @@ -91,7 +91,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1.2 s ± 27.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "1.09 s ± 14.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -111,14 +111,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "233 ms ± 5.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "215 ms ± 6.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%%timeit\n", - "ch = ChainLifter(\"hg38\", \"hg19\")\n", - "ch.convert_coordinate(\"chr5\", 1404391, \"+\")" + "converter = Converter(\"hg38\", \"hg19\")\n", + "converter.convert_coordinate(\"chr5\", 1404391, \"+\")" ] }, { @@ -138,7 +138,7 @@ "source": [ "# load beforehand\n", "pyl = LiftOver(\"hg38\", \"hg19\")\n", - "ch = ChainLifter(\"hg38\", \"hg19\")" + "converter = Converter(\"hg38\", \"hg19\")" ] }, { @@ -151,7 +151,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2.07 µs ± 67.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + "1.97 µs ± 72.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" ] } ], @@ -170,13 +170,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "1.97 µs ± 12.6 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)\n" + "2.77 µs ± 103 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" ] } ], "source": [ "%%timeit\n", - "ch.convert_coordinate(\"chr5\", 1404391, \"+\")" + "converter.convert_coordinate(\"chr5\", 1404391, \"+\")" ] }, { @@ -205,7 +205,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "335 ms ± 16.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "303 ms ± 13.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -224,13 +224,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "63.2 ms ± 773 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + "62.6 ms ± 2.99 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%%timeit\n", - "ch = ChainLifter(\"hg19\", \"hg38\")" + "converter = Converter(\"hg19\", \"hg38\")" ] }, { @@ -251,7 +251,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "321 ms ± 6.01 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "318 ms ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -271,14 +271,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "63.5 ms ± 806 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + "57.8 ms ± 742 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%%timeit\n", - "ch = ChainLifter(\"hg19\", \"hg38\")\n", - "ch.convert_coordinate(\"chr5\", 1404391, \"+\")" + "converter = Converter(\"hg19\", \"hg38\")\n", + "converter.convert_coordinate(\"chr5\", 1404391, \"+\")" ] }, { @@ -298,7 +298,7 @@ "source": [ "# load beforehand\n", "pyl = LiftOver(\"hg19\", \"hg38\")\n", - "ch = ChainLifter(\"hg19\", \"hg38\")" + "converter = Converter(\"hg19\", \"hg38\")" ] }, { @@ -311,7 +311,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2.02 µs ± 11.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + "2.16 µs ± 232 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)\n" ] } ], @@ -330,13 +330,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "2.02 µs ± 56 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + "2.87 µs ± 65 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" ] } ], "source": [ "%%timeit\n", - "ch.convert_coordinate(\"chr5\", 1404391, \"+\")" + "converter.convert_coordinate(\"chr5\", 1404391, \"+\")" ] } ], diff --git a/pyproject.toml b/pyproject.toml index e4c4122..080d65b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +1,16 @@ [project] -name = "chainlifter" +name = "agct" version = "0.1.0" authors = [ {name = "James Stevenson"} ] -description = "Python frontend to Rust chainfile crate" +description = "Another Genome Conversion Tool: Python frontend to Rust chainfile crate" readme = "README.md" license = {file = "LICENSE"} requires-python = ">=3.8" classifiers = [ "Development Status :: 3 - Alpha", "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", @@ -38,11 +36,11 @@ dev = [ ] [project.urls] -Homepage = "https://github.com/genomicmedlab/chainlifter" -Documentation = "https://github.com/genomicmedlab/chainlifter" -Changelog = "https://github.com/genomicmedlab/chainlifter/releases" -Source = "https://github.com/genomicmedlab/chainlifter" -"Bug Tracker" = "https://github.com/genomicmedlab/chainlifter/issues" +Homepage = "https://github.com/genomicmedlab/agct" +Documentation = "https://github.com/genomicmedlab/agct" +Changelog = "https://github.com/genomicmedlab/agct/releases" +Source = "https://github.com/genomicmedlab/agct" +"Bug Tracker" = "https://github.com/genomicmedlab/agct/issues" [build-system] requires = ["maturin>=1.2,<2.0"] @@ -50,7 +48,7 @@ build-backend = "maturin" [tool.maturin] features = ["pyo3/extension-module"] -module-name = "chainlifter._core" +module-name = "agct._core" python-source = "src" [tool.pytest.ini_options] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index a27190f..7c165d9 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "chainlifter" +name = "agct" version = "0.1.0" edition = "2021" [lib] -name = "chainlifter" +name = "agct" crate-type = ["cdylib"] [dependencies] diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 9f58d19..ddb9eed 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -7,21 +7,21 @@ use pyo3::prelude::*; use std::fs::File; use std::io::BufReader; -create_exception!(chainlifter, NoLiftoverError, PyException); -create_exception!(chainlifter, ChainfileError, PyException); -create_exception!(chainlifter, StrandValueError, PyException); +create_exception!(agct, NoLiftoverError, PyException); +create_exception!(agct, ChainfileError, PyException); +create_exception!(agct, StrandValueError, PyException); -/// Define core ChainLifter class to be used by Python interface. +/// Define core Converter class to be used by Python interface. /// Effectively just a wrapper on top of the chainfile crate's Machine struct. #[pyclass] -pub struct ChainLifter { +pub struct Converter { pub machine: chain::liftover::machine::Machine, } #[pymethods] -impl ChainLifter { +impl Converter { #[new] - pub fn new(chainfile_path: &str) -> PyResult { + pub fn new(chainfile_path: &str) -> PyResult { let Ok(chainfile_file) = File::open(chainfile_path) else { return Err(PyFileNotFoundError::new_err(format!( "Unable to open chainfile located at \"{}\"", @@ -36,7 +36,7 @@ impl ChainLifter { &chainfile_path ))); }; - Ok(ChainLifter { machine }) + Ok(Converter { machine }) } /// Perform liftover @@ -83,11 +83,11 @@ impl ChainLifter { } } -/// ChainLifter Python module. Collect Python-facing methods. +/// agct._core Python module. Collect Python-facing methods. #[pymodule] #[pyo3(name = "_core")] -fn chainlifter(_py: Python<'_>, m: &PyModule) -> PyResult<()> { - m.add_class::()?; +fn agct(_py: Python<'_>, m: &PyModule) -> PyResult<()> { + m.add_class::()?; m.add("NoLiftoverError", _py.get_type::())?; m.add("ChainfileError", _py.get_type::())?; m.add("StrandValueError", _py.get_type::())?; diff --git a/src/agct/__init__.py b/src/agct/__init__.py new file mode 100644 index 0000000..cdb2f06 --- /dev/null +++ b/src/agct/__init__.py @@ -0,0 +1,4 @@ +"""Provide fast liftover in Python via the ``chainfile`` crate.""" +from agct.converter import Converter, Genome, Strand + +__all__ = ["Converter", "Strand", "Genome"] diff --git a/src/chainlifter/lifter.py b/src/agct/converter.py similarity index 92% rename from src/chainlifter/lifter.py rename to src/agct/converter.py index db4dffa..e7567cd 100644 --- a/src/chainlifter/lifter.py +++ b/src/agct/converter.py @@ -8,7 +8,7 @@ from wags_tails.utils.downloads import download_http, handle_gzip from wags_tails.utils.storage import get_data_dir -import chainlifter._core as _core +import agct._core as _core _logger = logging.getLogger(__name__) @@ -31,7 +31,7 @@ class Genome(str, Enum): HG19 = "hg19" -class ChainLifter: +class Converter: """Chainfile-based liftover provider for a single sequence to sequence association. """ @@ -59,7 +59,7 @@ def __init__(self, from_db: Genome, to_db: Genome) -> None: ) file, _ = data_handler.get_latest() try: - self._chainlifter = _core.ChainLifter(str(file.absolute())) + self._converter = _core.Converter(str(file.absolute())) except FileNotFoundError as e: _logger.error("Unable to open chainfile located at %s", file.absolute()) raise e @@ -100,10 +100,10 @@ def convert_coordinate( .. code-block:: python - from chainlifter.lifter import ChainLifter, Strand + from agct import Converter, Strand - lifter = ChainLifter("hg19", "hg38") - lifter.convert_coordinate("chr7", 140453136, Strand.POSITIVE) + c = Converter("hg19", "hg38") + c.convert_coordinate("chr7", 140453136, Strand.POSITIVE) # returns [['chr7', '140753336', '+']] @@ -113,7 +113,7 @@ def convert_coordinate( :return: list of coordinate matches (possibly empty) """ try: - results = self._chainlifter.lift(chrom, pos, strand) + results = self._converter.lift(chrom, pos, strand) except _core.NoLiftoverError: results = [] except _core.ChainfileError: diff --git a/src/chainlifter/__init__.py b/src/chainlifter/__init__.py deleted file mode 100644 index cf49068..0000000 --- a/src/chainlifter/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Provide fast liftover in Python via the ``chainfile`` crate.""" diff --git a/src/chainlifter/version.py b/src/chainlifter/version.py deleted file mode 100644 index 34c835c..0000000 --- a/src/chainlifter/version.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Define library version.""" -__version__ = "0.0.1" diff --git a/tests/test_liftover.py b/tests/test_liftover.py index 1e3dc8c..2ac1fdb 100644 --- a/tests/test_liftover.py +++ b/tests/test_liftover.py @@ -1,44 +1,44 @@ """Run liftover tests.""" -from chainlifter.lifter import ChainLifter, Genome, Strand +from agct import Converter, Genome, Strand def test_hg19_to_hg38(): """Test hg19 to hg38 lifter.""" - ch = ChainLifter(Genome.HG19, Genome.HG38) + converter = Converter(Genome.HG19, Genome.HG38) - result = ch.convert_coordinate("chr7", 140439611) + result = converter.convert_coordinate("chr7", 140439611) assert len(result) == 1 assert result[0] == ("chr7", 140739811, Strand.POSITIVE) - result = ch.convert_coordinate("chr7", 140439746) + result = converter.convert_coordinate("chr7", 140439746) assert len(result) == 1 assert result[0] == ("chr7", 140739946, Strand.POSITIVE) - result = ch.convert_coordinate("chr7", 140439703) + result = converter.convert_coordinate("chr7", 140439703) assert len(result) == 1 assert result[0] == ("chr7", 140739903, Strand.POSITIVE) - result = ch.convert_coordinate("chr7", 140453136) + result = converter.convert_coordinate("chr7", 140453136) assert len(result) == 1 assert result[0] == ("chr7", 140753336, Strand.POSITIVE) # coordinate exceeds bounds - result = ch.convert_coordinate("chr7", 14040053136) + result = converter.convert_coordinate("chr7", 14040053136) assert result == [] def test_hg38_to_hg19(): - "Test hg38 to hg19 lifter." "" - ch = ChainLifter(Genome.HG38, Genome.HG19) + """Test hg38 to hg19 lifter.""" + converter = Converter(Genome.HG38, Genome.HG19) - result = ch.convert_coordinate("chr7", 140739811) + result = converter.convert_coordinate("chr7", 140739811) assert len(result) == 1 assert result[0] == ("chr7", 140439611, Strand.POSITIVE) - result = ch.convert_coordinate("chr7", 140759820) + result = converter.convert_coordinate("chr7", 140759820) assert len(result) == 1 assert result[0] == ("chr7", 140459620, Strand.POSITIVE) - result = ch.convert_coordinate("chr7", 60878240) + result = converter.convert_coordinate("chr7", 60878240) assert len(result) == 1 assert result[0] == ("chr7", 61646115, Strand.POSITIVE) diff --git a/tests/test_rust_api.py b/tests/test_rust_api.py index 4ff8c0a..a8019d2 100644 --- a/tests/test_rust_api.py +++ b/tests/test_rust_api.py @@ -1,12 +1,12 @@ """Test some non-public aspects of the Rust layer.""" import pytest -from chainlifter._core import ChainfileError, ChainLifter +from agct._core import ChainfileError, Converter def test_open_chainfile_errors(data_dir): """Test chainfile opening/reading errors.""" with pytest.raises(FileNotFoundError): - ChainLifter(str(data_dir / "non_existent_chainfile.chain")) + Converter(str(data_dir / "non_existent_chainfile.chain")) with pytest.raises(ChainfileError): - ChainLifter(str(data_dir / "invalid_chainfile.chain")) + Converter(str(data_dir / "invalid_chainfile.chain"))