Skip to content

Commit

Permalink
Merge branch 'main' into precommit-version
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Mar 21, 2024
2 parents a7a9188 + ec7502d commit 35e457d
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 43 deletions.
34 changes: 31 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
# agct: Another Genome Conversion Tool

Drop-in replacement for the [pyliftover](https://github.com/konstantint/pyliftover) tool, using the St. Jude's [chainfile](https://docs.rs/chainfile/latest/chainfile/) crate. Enables significantly faster chainfile loading from cold start (see `analysis/`).
[![image](https://img.shields.io/pypi/v/agct.svg)](https://pypi.python.org/pypi/agct)
[![image](https://img.shields.io/pypi/l/agct.svg)](https://pypi.python.org/pypi/agct)
[![image](https://img.shields.io/pypi/pyversions/agct.svg)](https://pypi.python.org/pypi/agct)
[![Actions status](https://github.com/genomicmedlab/agct/workflows/CI/badge.svg)](https://github.com/genomicmedlab/agct/actions)

Status: alpha.
<!-- description -->
A drop-in replacement for the [pyliftover](https://github.com/konstantint/pyliftover) tool, using the St. Jude's [chainfile](https://docs.rs/chainfile/latest/chainfile/) crate. Enables significantly faster chainfile loading from cold start (see `analysis/`).
<!-- /description -->

## Installation

Install from [PyPI](https://pypi.org/project/agct/):

```shell
python3 -m pip install agct
```

## Usage

Expand All @@ -13,11 +26,13 @@ from agct import Converter
c = Converter("hg38", "hg19")
```

> If a chainfile is unavailable locally, it's downloaded from UCSC and saved using the `wags-tails` package -- see the [configuration instructions](https://github.com/GenomicMedLab/wags-tails?tab=readme-ov-file#configuration) for information on how to designate a non-default storage location.
Call ``convert_coordinate()``:

```python3
c.convert_coordinate("chr7", 140453136, "+")
# [['chr7', '140152936', '+']]
# [['chr7', 140152936, '+']]
```

## Development
Expand All @@ -44,6 +59,19 @@ This installs Python code as editable, but after any changes to Rust code, ``mat
maturin develop
```

Check Python style with `ruff`:

```shell
python3 -m ruff format . && python3 -m ruff check --fix .
```

Use `cargo fmt` to check Rust style (must be run from within the `rust/` subdirectory):

```shell
cd rust/
cargo fmt
```

Run tests with `pytest`:

```shell
Expand Down
78 changes: 60 additions & 18 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
[project]
name = "agct"
version = "0.1.0-dev0"
version = "0.1.0-dev1"
authors = [
{name = "James Stevenson"}
{name = "James Stevenson"},
{name = "Kori Kuzma"},
]
description = "Another Genome Conversion Tool: Python frontend to Rust chainfile crate"
readme = "README.md"
license = {file = "LICENSE"}
requires-python = ">=3.8"
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Rust",
Expand All @@ -18,10 +16,14 @@ classifiers = [
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
]
requires-python = ">=3.8"
description = "Another Genome Conversion Tool: Python frontend to Rust chainfile crate"
license = {file = "LICENSE"}
dependencies = ["wags-tails"]

[project.optional-dependencies]
Expand All @@ -31,7 +33,7 @@ tests = [
]
dev = [
"maturin",
"ruff>=0.1.12",
"ruff==0.2.0",
"pre-commit",
]

Expand Down Expand Up @@ -60,15 +62,51 @@ branch = true

[tool.ruff]
src = ["src"]
# pycodestyle (E, W)
# Pyflakes (F)
# flake8-annotations (ANN)
# pydocstyle (D)
# pep8-naming (N)
# isort (I)
select = ["E", "W", "F", "ANN", "D", "N", "I"]
fixable = ["I", "F401"]

[tool.ruff.lint]
select = [
"F", # https://docs.astral.sh/ruff/rules/#pyflakes-f
"E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
"I", # https://docs.astral.sh/ruff/rules/#isort-i
"N", # https://docs.astral.sh/ruff/rules/#pep8-naming-n
"D", # https://docs.astral.sh/ruff/rules/#pydocstyle-d
"UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up
"ANN", # https://docs.astral.sh/ruff/rules/#flake8-annotations-ann
"ASYNC", # https://docs.astral.sh/ruff/rules/#flake8-async-async
"S", # https://docs.astral.sh/ruff/rules/#flake8-bandit-s
"B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
"A", # https://docs.astral.sh/ruff/rules/#flake8-builtins-a
"C4", # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4
"DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
"T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
"EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
"G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
"PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
"PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt
"Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q
"RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
"RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
"PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
"PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
]
fixable = [
"I",
"F401",
"D",
"UP",
"ANN",
"B",
"C4",
"G",
"PIE",
"PT",
"RSE",
"SIM",
"RUF"
]
# ANN101 - missing-type-self
# ANN003 - missing-type-kwargs
# D203 - one-blank-line-before-class
Expand All @@ -83,16 +121,20 @@ fixable = ["I", "F401"]
# E117 - over-indented*
# E501 - line-too-long*
# W191 - tab-indentation*
# S321 - suspicious-ftp-lib-usage
# *ignored for compatibility with formatter
ignore = [
"ANN101", "ANN003",
"D203", "D205", "D206", "D213", "D300", "D400", "D415",
"E111", "E114", "E117", "E501",
"W191"
"W191",
"S321",
]

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
# ANN001 - missing-type-function-argument
# ANN102 - missing-type-cls
# ANN2 - missing-return-type
"tests/*" = ["ANN001", "ANN102", "ANN2"]
# ANN102 - missing-type-cls
# S101 - assert
# B011 - assert-false
"tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"]
60 changes: 38 additions & 22 deletions src/agct/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from enum import Enum
from pathlib import Path
from typing import Callable, List, Tuple
from typing import Callable, List, Optional, Tuple

from wags_tails import CustomData
from wags_tails.utils.downloads import download_http, handle_gzip
Expand Down Expand Up @@ -36,35 +36,51 @@ class Converter:
association.
"""

def __init__(self, from_db: Genome, to_db: Genome) -> None:
def __init__(
self,
from_db: Optional[Genome] = None,
to_db: Optional[Genome] = None,
chainfile: Optional[str] = None,
) -> None:
"""Initialize liftover instance.
:param from_db: database name, e.g. ``"19"``
:param to_db: database name, e.g. ``"38"``
:param from_db: database name, e.g. ``"19"``. Must be different than ``to_db``
If ``chainfile`` is provided, will ignore this argument
:param to_db: database name, e.g. ``"38"``. Must be different than ``from_db``
If ``chainfile`` is provided, will ignore this argument
:param chainfile: Path to chainfile
If not provided, must provide both ``from_db`` and ``to_db`` so that
``wags-tails`` can download the corresponding chainfile
:raise ValueError: if required arguments are not passed or are invalid
:raise FileNotFoundError: if unable to open corresponding chainfile
:raise _core.ChainfileError: if unable to read chainfile (i.e. it's invalid)
"""
if from_db == to_db:
raise ValueError("Liftover must be to/from different sources.")
if not isinstance(from_db, Genome):
from_db = Genome(from_db)
if not isinstance(to_db, Genome):
to_db = Genome(to_db)
data_handler = CustomData(
f"chainfile_{from_db.value}_to_{to_db.value}",
"chain",
lambda: "",
self._download_function_builder(from_db, to_db),
data_dir=get_data_dir() / "ucsc-chainfile",
)
file, _ = data_handler.get_latest()
if not chainfile:
if from_db is None and to_db is None:
msg = "Must provide both `from_db` and `to_db`"
raise ValueError(msg)

if from_db == to_db:
msg = "Liftover must be to/from different sources."
raise ValueError(msg)

data_handler = CustomData(
f"chainfile_{from_db.value}_to_{to_db.value}",
"chain",
lambda: "",
self._download_function_builder(from_db, to_db),
data_dir=get_data_dir() / "ucsc-chainfile",
)
file, _ = data_handler.get_latest()
chainfile = str(file.absolute())

try:
self._converter = _core.Converter(str(file.absolute()))
self._converter = _core.Converter(chainfile)
except FileNotFoundError as e:
_logger.error("Unable to open chainfile located at %s", file.absolute())
_logger.error("Unable to open chainfile located at %s", chainfile)
raise e
except _core.ChainfileError as e:
_logger.error("Error reading chainfile located at %s", file.absolute())
_logger.error("Error reading chainfile located at %s", chainfile)
raise e

@staticmethod
Expand Down Expand Up @@ -104,7 +120,7 @@ def convert_coordinate(
c = Converter("hg19", "hg38")
c.convert_coordinate("chr7", 140453136, Strand.POSITIVE)
# returns [['chr7', '140753336', '+']]
# returns [['chr7', 140753336, '+']]
:param chrom: chromosome name as given in chainfile. Usually e.g. ``"chr7"``.
Expand Down
21 changes: 21 additions & 0 deletions tests/test_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Module for testing Converter initialization"""
import pytest
from tests.conftest import DATA_DIR

from agct import Converter, Genome


def test_valid():
"""Test valid initialization"""
assert Converter(
chainfile=str(DATA_DIR / "ucsc-chainfile" / "chainfile_hg19_to_hg38_.chain")
)


def test_invalid():
"""Test invalid initialization"""
with pytest.raises(ValueError, match="Must provide both `from_db` and `to_db`"):
Converter()

with pytest.raises(ValueError, match="Liftover must be to/from different sources."):
Converter(Genome.HG19, Genome.HG19)

0 comments on commit 35e457d

Please sign in to comment.