Skip to content

Commit

Permalink
feat: use wags-tails for data management (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Jan 12, 2024
1 parent be2c96b commit d71bb17
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 42 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
]
dependencies = []
dependencies = ["wags-tails"]

[project.optional-dependencies]
test = [
Expand Down
46 changes: 7 additions & 39 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,12 @@
//! Provide Rust-based chainfile wrapping classes.
use chain::core::{Coordinate, Interval, Strand};
use chainfile as chain;
use directories::BaseDirs;
use pyo3::exceptions::PyValueError;
use pyo3::exceptions::{PyFileNotFoundError, PyValueError};
use pyo3::prelude::*;
use std::env;
use std::fs;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;

fn get_chainfile_dir() -> String {
let env_var_name = "CHAINLIFTER_DATA_DIR";
if let Ok(value) = env::var(env_var_name) {
return value;
} else if let Some(base_dirs) = BaseDirs::new() {
let data_dir = base_dirs.home_dir();
let base_chainfile_dir = format!("{}/.local/share/chainlifter", data_dir.display());
return base_chainfile_dir;
} else {
panic!("Unable to get ChainLifter data directory.")
}
}

/// Acquire chainfile.
/// TODO: fetch from remote if not available locally, probably via config
/// TODO: throw exceptions if unable to acquire
/// TODO: specify base dir
fn get_chainfile(from_db: &str, to_db: &str) -> String {
let base_chainfile_dir = get_chainfile_dir();
fs::create_dir_all(base_chainfile_dir.clone()).unwrap();
let path = format!(
"{}/hg{}ToHg{}.over.chain",
base_chainfile_dir, from_db, to_db
);
if Path::new(&path).exists() {
path
} else {
"this isn't going to work".to_string()
}
}

/// Define core ChainLifter class to be used by Python interface.
/// Effectively just a wrapper on top of the chainfile crate's Machine struct.
#[pyclass]
Expand All @@ -51,14 +17,16 @@ pub struct ChainLifter {
#[pymethods]
impl ChainLifter {
#[new]
pub fn new(from_db: &str, to_db: &str) -> ChainLifter {
let chainfile_name: String = get_chainfile(from_db, to_db);
let data = BufReader::new(File::open(&chainfile_name).unwrap());
pub fn new(chainfile_path: &str) -> PyResult<ChainLifter> {
if !Path::new(&chainfile_path).exists() {
return Err(PyFileNotFoundError::new_err("Chainfile doesn't exist"));
}
let data = BufReader::new(File::open(&chainfile_path).unwrap());
let reader = chain::Reader::new(data);
let machine = chain::liftover::machine::Builder::default()
.try_build_from(reader)
.unwrap();
ChainLifter { machine }
Ok(ChainLifter { machine })
}

/// Perform liftover
Expand Down
42 changes: 40 additions & 2 deletions src/chainlifter/lifter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
"""Perform chainfile-driven liftover."""
from enum import Enum
from pathlib import Path
from typing import Callable

from wags_tails import CustomData
from wags_tails.utils.downloads import download_http, handle_gzip
from wags_tails.utils.storage import get_data_dir

import chainlifter._core as _core

Expand All @@ -22,7 +28,39 @@ def __init__(self, from_db: str, to_db: str) -> None:
:param from_db: database name, e.g. ``"19"``
:param to_db: database name, e.g. ``"38"``
"""
self._chainlifter = _core.ChainLifter(from_db, to_db)
data_handler = CustomData(
f"chainfile_{from_db}_to_{to_db}",
"chain",
lambda: "",
self._download_function_builder(from_db, to_db),
data_dir=get_data_dir() / "ucsc-chainfile",
)
file, _ = data_handler.get_latest()
self._chainlifter = _core.ChainLifter(str(file.absolute()))

@staticmethod
def _download_function_builder(from_db: str, to_db: str) -> Callable:
"""Build downloader function for chainfile corresponding to source/destination
params.
Wags-Tails' custom data handler takes a downloader callback function. We
construct it here, curried with from/to values in the download URL.
:param from_db: genome lifting from
:param to_db: genome lifting to
:return: Function that downloads appropriate chainfile from UCSC
"""

def _download_data(version: str, file: Path) -> None:
"""Download and gunzip chainfile from UCSC.
:param version: not used
:param file: path to save file to
"""
url = f"https://hgdownload.soe.ucsc.edu/goldenPath/{from_db}/liftOver/{from_db}To{to_db.title()}.over.chain.gz"
download_http(url, file, handler=handle_gzip)

return _download_data

def convert_coordinate(
self, chrom: str, pos: int, strand: Strand = Strand.POSITIVE
Expand All @@ -35,7 +73,7 @@ def convert_coordinate(
from chainlifter.lifter import ChainLifter, Strand
lifter = ChainLifter("19", "38")
lifter = ChainLifter("hg19", "hg38")
lifter.convert_coordinate("chr7", 140453136, Strand.POSITIVE)
# returns [['chr7', '140753336', '+']]
Expand Down

0 comments on commit d71bb17

Please sign in to comment.