From 209d622b5a922c905ae3e63fbee7a6a5ef8d3c16 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 2 Oct 2023 16:29:00 +0100 Subject: [PATCH 1/8] Adding CHANGELOG --- CHANGELOG.md | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dd167c..bbd73cc 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,46 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## v1.0 - UNSC Infinity - [2023-10-02] Initial release of sanger-tol/curationpretext, created with the [sager-tol](https://nf-co.re/) template. ### `Added` +- Subworkflow to generate tracks containing telomeric sites. +- Subworkflow to generate Pretext maps and images +- Subworkflow to generate repeat density tracks. +- Subworkflow to generate longread coverage tracks from pacbio data. +- Subworkflow to generate gap tracks. -### `Fixed` +### Parameters -### `Dependencies` +### Software Dependencies -### `Deprecated` +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Module | Old Version | New Versions | +| -------------------------------------- | ----------- | ---------------- | +| bamtobed_sort ( bedtools + samtools ) | - | 2.31.0 + 1.17 | +| bedtools | - | 2.31.0 | +| cram_filter_align_bwamem2_fixmate_sort | - | | +| ^ ( samtools + bwamem2 ) ^ | - | 1.16.1 + 2.2.1 | +| extract_cov_id ( coreutils ) | - | 9.1 | +| extract_repeat ( perl ) | - | 5.26.2 | +| extract_telo ( coreutils ) | - | 9.1 | +| find_telomere_regions ( gcc ) | - | 7.1.0 | +| find_telomere_windows ( java-jdk ) | - | 8.0.112 | +| gap_length ( coreutils ) | - | 9.1 | +| generate_cram_csv ( samtools ) | - | 1.17 | +| get_largest_scaff ( coreutils ) | - | 9.1 | +| gnu-sort | - | 8.25 | +| pretextmap + samtools | - | 0.1.9 + 1.17 | +| seqtk | - | 1.4 | +| tabix | - | 1.11 | +| ucsc | - | 377 | +| windowmasker (blast) | - | 2.14.0 | + +### Fixed + +### Dependencies + +### Deprecated From 3fcf1bb04d762d25357e490745ed2db0823f4f90 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 2 Oct 2023 16:32:14 +0100 Subject: [PATCH 2/8] Adding CHANGELOG --- CHANGELOG.md | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbd73cc..65e641e 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial release of sanger-tol/curationpretext, created with the [sager-tol](https://nf-co.re/) template. ### `Added` + - Subworkflow to generate tracks containing telomeric sites. - Subworkflow to generate Pretext maps and images - Subworkflow to generate repeat density tracks. @@ -20,26 +21,26 @@ Initial release of sanger-tol/curationpretext, created with the [sager-tol](http Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. -| Module | Old Version | New Versions | -| -------------------------------------- | ----------- | ---------------- | -| bamtobed_sort ( bedtools + samtools ) | - | 2.31.0 + 1.17 | -| bedtools | - | 2.31.0 | -| cram_filter_align_bwamem2_fixmate_sort | - | | -| ^ ( samtools + bwamem2 ) ^ | - | 1.16.1 + 2.2.1 | -| extract_cov_id ( coreutils ) | - | 9.1 | -| extract_repeat ( perl ) | - | 5.26.2 | -| extract_telo ( coreutils ) | - | 9.1 | -| find_telomere_regions ( gcc ) | - | 7.1.0 | -| find_telomere_windows ( java-jdk ) | - | 8.0.112 | -| gap_length ( coreutils ) | - | 9.1 | -| generate_cram_csv ( samtools ) | - | 1.17 | -| get_largest_scaff ( coreutils ) | - | 9.1 | -| gnu-sort | - | 8.25 | -| pretextmap + samtools | - | 0.1.9 + 1.17 | -| seqtk | - | 1.4 | -| tabix | - | 1.11 | -| ucsc | - | 377 | -| windowmasker (blast) | - | 2.14.0 | +| Module | Old Version | New Versions | +| -------------------------------------- | ----------- | -------------- | +| bamtobed_sort ( bedtools + samtools ) | - | 2.31.0 + 1.17 | +| bedtools | - | 2.31.0 | +| cram_filter_align_bwamem2_fixmate_sort | - | | +| ^ ( samtools + bwamem2 ) ^ | - | 1.16.1 + 2.2.1 | +| extract_cov_id ( coreutils ) | - | 9.1 | +| extract_repeat ( perl ) | - | 5.26.2 | +| extract_telo ( coreutils ) | - | 9.1 | +| find_telomere_regions ( gcc ) | - | 7.1.0 | +| find_telomere_windows ( java-jdk ) | - | 8.0.112 | +| gap_length ( coreutils ) | - | 9.1 | +| generate_cram_csv ( samtools ) | - | 1.17 | +| get_largest_scaff ( coreutils ) | - | 9.1 | +| gnu-sort | - | 8.25 | +| pretextmap + samtools | - | 0.1.9 + 1.17 | +| seqtk | - | 1.4 | +| tabix | - | 1.11 | +| ucsc | - | 377 | +| windowmasker (blast) | - | 2.14.0 | ### Fixed From ec7656e2d0a2fccb3495a05a73e1b4bca5a0b92a Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 11:23:11 +0100 Subject: [PATCH 3/8] Correcting memory requirements in base --- conf/base.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/base.config b/conf/base.config index b25f0e7..891e2fe 100755 --- a/conf/base.config +++ b/conf/base.config @@ -21,12 +21,12 @@ process { withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory' ) } + memory = { check_max( 25.GB * task.attempt, 'memory' ) } } withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 80.GB * task.attempt, 'memory' ) } + memory = { check_max( 25.GB * task.attempt, 'memory' ) } } withName: PRETEXTMAP_STANDRD{ @@ -45,7 +45,7 @@ process { } withName: BWAMEM2_INDEX { - memory = { check_max( 100.GB * task.attempt, 'memory' ) } + memory = { check_max( 25.GB * task.attempt, 'memory' ) } } // Process-specific resource requirements From 07122fbc03291f1e5ca3809f5a490c56a0bbb0e3 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 11:24:42 +0100 Subject: [PATCH 4/8] Updating docs to add information on updating test.config for local use --- docs/usage.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 289bf12..f0cb0c3 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -19,6 +19,32 @@ The `--pacbio` should point to the folder containing `.fasta.gz` files. If you do not have these file formats we have also included instructions on converting from common formats to our preferred format. If there is a popular public preference for a particular format, we can modify the pipeline to utilise those formats. Just submit an issue. +## Prior to running CurationPretext + +
+ Details + +We provide a complete set of data that can be used to test the pipeline locally. + +By default the test.config file is set up to run on GitHub, however, should you want to test this locally you can follow the below instructions. + +First, choose a download location `${PRETEXT_TEST_DATA}` and run this command (this assumes you are inside the curationpretext directory): + +``` +PRETEXT_TEST_DATA=$(pwd) +curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - + +sed -i'' -e "s|/home/runner/work/curationpretext/curationpretext|${PRETEXT_TEST_DATA}|" conf/test.config +``` + +Then, you should be able to run the pipeline with: + +``` +nextflow run . -profile test,singularity +``` + +
+ ### HiC data Preparation
From 76d5dec5b6561fcd6df074a0f2b0c98e06fa41b2 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Mon, 9 Oct 2023 11:26:48 +0100 Subject: [PATCH 5/8] Update CHANGELOG.md Co-authored-by: Priyanka Surana --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65e641e..bca2ffe 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0 - UNSC Infinity - [2023-10-02] +## [[1.0.0](https://github.com/sanger-tol/curationpretext/releases/tag/1.0.0)] - UNSC Infinity - [2023-10-02] Initial release of sanger-tol/curationpretext, created with the [sager-tol](https://nf-co.re/) template. From 26dbbe88b5d3b51715aa3e9084f8e4781ca60096 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 11:31:19 +0100 Subject: [PATCH 6/8] adding parameters --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65e641e..d6b79e8 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,15 @@ Initial release of sanger-tol/curationpretext, created with the [sager-tol](http ### Parameters +| Old Version | New Versions | +| ----------- | -------------- | +| | --input | +| | --cram | +| | --pacbio | +| | --sample | +| | --teloseq | +| | -entry | + ### Software Dependencies Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. From 53b01efaf57ba9c275109d34b54a40b0b8a958da Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 11:32:31 +0100 Subject: [PATCH 7/8] remove quotes --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07e40af..d292f24 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial release of sanger-tol/curationpretext, created with the [sager-tol](https://nf-co.re/) template. -### `Added` +### Added - Subworkflow to generate tracks containing telomeric sites. - Subworkflow to generate Pretext maps and images From 8e70da9622fac9f48a118b21664a3e269ff0b400 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 11:33:21 +0100 Subject: [PATCH 8/8] linting --- CHANGELOG.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d292f24..7b47149 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,14 +17,14 @@ Initial release of sanger-tol/curationpretext, created with the [sager-tol](http ### Parameters -| Old Version | New Versions | -| ----------- | -------------- | -| | --input | -| | --cram | -| | --pacbio | -| | --sample | -| | --teloseq | -| | -entry | +| Old Version | New Versions | +| ----------- | ------------ | +| | --input | +| | --cram | +| | --pacbio | +| | --sample | +| | --teloseq | +| | -entry | ### Software Dependencies