diff --git a/.gitignore b/.gitignore index da7e4be..15d4d1f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ null/ co2footprint* .nf-test/ .nf-test.log +.vscode diff --git a/README.md b/README.md index e33bf0c..9499106 100644 --- a/README.md +++ b/README.md @@ -2,49 +2,53 @@ ## Introduction -**sanger-tol/longreadmag** is a bioinformatics pipeline that ... - - +**sanger-tol/longreadmag** is a bioinformatics pipeline for the assembly and binning of metagenomes +using PacBio HiFi data and (optionally) Hi-C Illumina data. - -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Assembles raw reads using metaMDBG ([`MultiQC`](http://multiqc.info/)) +2. Maps HiFi and (optionally) Hi-C reads to the assembly +3. Bins the assembly using MetaBat2, MaxBin2, Bin3C, and Metator +4. (optionally) refine the bins using DAS_Tool or MagScoT +5. Assesses the completeness and contamination of bins using CheckM2 and assessing ncRNA content using Prokka +6. Assigns taxonomy to medium-quality and above bins using GTDB-Tk +7. Summarises information at the bin level ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run sanger-tol/longreadmag \ -profile \ - --input samplesheet.csv \ + --input input.yaml \ --outdir ``` @@ -55,9 +59,7 @@ nextflow run sanger-tol/longreadmag \ sanger-tol/longreadmag was originally written by Jim Downie, Will Eagles, Noah Gettle. -We thank the following people for their extensive assistance in the development of this pipeline: - - + ## Contributions and Support diff --git a/conf/test_full.config b/conf/test_full.config index d2e3078..211955f 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,6 +15,5 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // Input data - input = "${baseDir}/assets/test_full_input.yaml" + input = params.pipelines_testdata_base_path + "test_input_full.yaml" } diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index 35dca70..75e343b 100644 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -1,7 +1,7 @@ #!/usr/bin/env nextflow def readYAML( yamlfile ) { - return new org.yaml.snakeyaml.Yaml().load( new FileReader( yamlfile.toString() ) ) + return new org.yaml.snakeyaml.Yaml().load(yamlfile.text) } workflow YAML_INPUT { @@ -10,9 +10,8 @@ workflow YAML_INPUT { main: // ch_versions = Channel.empty() - - yamlfile = Channel.from(input_file) - .map { file -> readYAML(file) } + yamlfile = Channel.fromPath(input_file) + | map { file -> readYAML(file) } // // LOGIC: PARSES THE TOP LEVEL OF YAML VALUES @@ -21,11 +20,11 @@ workflow YAML_INPUT { | flatten() | multiMap { data -> pacbio_fasta: ( data.pacbio ? [ - [ id: data.tolid ], + [ id: data.id ], data.pacbio.fasta.collect { file(it, checkIfExists: true) } ] : error("ERROR: Pacbio reads not provided! Pipeline will not run as there is nothing to do.") ) hic_cram: ( data.hic ? [ - [ id: data.tolid ], + [ id: data.id ], data.hic.cram.collect { file(it, checkIfExists: true) } ] : [] ) hic_enzymes: ( data.hic ? diff --git a/tests/main.nf.test b/tests/main.nf.test index 3bdaaca..0456650 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -8,6 +8,7 @@ nextflow_pipeline { when { params { outdir = "${outputDir}" + enable_gtdbtk = false } } @@ -22,7 +23,7 @@ nextflow_pipeline { // Number of successful tasks workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnaseq_software_mqc_versions.yml"), + removeNextflowVersion("$outputDir/pipeline_info/sangertol_longreadmag_pipeline_software_mqc_versions.yml"), // All stable path name, with a relative path stable_name, // All files with stable contents diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap deleted file mode 100644 index 1ad6480..0000000 --- a/tests/main.nf.test.snap +++ /dev/null @@ -1,19 +0,0 @@ -{ - "Params: default": { - "content": [ - 0, - null, - [ - "pipeline_info" - ], - [ - "params_2024-12-20_11-18-21.json:md5,948745d6c0c12ac3ead0100ae3d904c6" - ] - ], - "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-20T11:18:22.153429" - } -} \ No newline at end of file diff --git a/workflows/longreadmag.nf b/workflows/longreadmag.nf index b0110f5..d26b047 100644 --- a/workflows/longreadmag.nf +++ b/workflows/longreadmag.nf @@ -150,7 +150,7 @@ workflow LONGREADMAG { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'longreadmag_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + name: 'sangertol_longreadmag_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', sort: true, newLine: true ).set { ch_collated_versions }