diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06ac277d..b788565a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: run: | curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - - - name: Singularity - Run FULL pipeline with test data + - name: Run FULL pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test_github,docker --outdir ./Sing-Full diff --git a/CHANGELOG.md b/CHANGELOG.md index dd0869b0..6be0c8b6 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -118,6 +118,13 @@ This builds on the initial release by adding subworkflows which generate kmer ba - Fix a bug in build_alignment_blocks.py to avoid indexing errors happening in large genomes. - Change output BEDGRAPH from EXTRACT_TELO module. +#### Hot Fix 1 + +- Generate CRAM CSV fix to allow for multi-readgroup cram files +- Removing KMER_READCOV +- tmp directory was being used +- Output file adjustment (names and location) + ### Parameters | Old Parameter | New Parameter | diff --git a/main.nf b/main.nf index 2a9fc377..2289d95c 100755 --- a/main.nf +++ b/main.nf @@ -23,6 +23,7 @@ WorkflowMain.initialise( workflow, params, log ) */ include { TREEVAL } from './workflows/treeval' +include { TREEVAL_JBROWSE } from './workflows/treeval_jbrowse' include { TREEVAL_RAPID } from './workflows/treeval_rapid' include { TREEVAL_RAPID_TOL } from './workflows/treeval_rapid_tol' @@ -33,6 +34,15 @@ workflow SANGERTOL_TREEVAL { TREEVAL () } + +// +// WORKFLOW: RUN MAIN PIPELINE ONLY THE JBROWSE COMPATIBLE COMPONENTS - E.G. NO MAPS +// +workflow SANGERTOL_TREEVAL_JBROWSE { + TREEVAL_JBROWSE () +} + + // // WORKFLOW: RUN TRUNCATED PIPELINE TO PRODUCE CONTACT MAPS AND PRETEXT ACCESSORIES // @@ -40,6 +50,7 @@ workflow SANGERTOL_TREEVAL_RAPID { TREEVAL_RAPID () } + // // WORKFLOW: RUN TRUNCATED PIPELINE, CONTAINS WORKFLOWS INTERNAL TO SANGERTOL // @@ -47,6 +58,7 @@ workflow SANGERTOL_TREEVAL_RAPID_TOL { TREEVAL_RAPID_TOL () } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS @@ -57,17 +69,25 @@ workflow SANGERTOL_TREEVAL_RAPID_TOL { // WORKFLOWS: Execute named workflow for the pipeline // workflow { - SANGERTOL_TREEVAL () + SANGERTOL_TREEVAL () +} + + +workflow JBROWSE { + SANGERTOL_TREEVAL_JBROWSE () } + workflow RAPID { - SANGERTOL_TREEVAL_RAPID () + SANGERTOL_TREEVAL_RAPID () } + workflow RAPID_TOL { - SANGERTOL_TREEVAL_RAPID_TOL () + SANGERTOL_TREEVAL_RAPID_TOL () } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/subworkflows/local/busco_annotation.nf b/subworkflows/local/busco_annotation.nf index a3227a1b..b3a4e972 100755 --- a/subworkflows/local/busco_annotation.nf +++ b/subworkflows/local/busco_annotation.nf @@ -46,7 +46,6 @@ workflow BUSCO_ANNOTATION { [] ) ch_versions = ch_versions.mix(BUSCO_BUSCO.out.versions.first()) - ch_grab = GrabFiles(BUSCO_BUSCO.out.busco_dir) // diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index bd30e0df..200b4227 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -85,6 +85,7 @@ workflow HIC_MAPPING { } .set{ch_aligner} + // // SUBWORKFLOW: mapping hic reads using minimap2 // @@ -96,6 +97,7 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) mergedbam = HIC_MINIMAP2.out.mergedbam + // // SUBWORKFLOW: mapping hic reads using bwamem2 // @@ -107,6 +109,7 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) mergedbam = mergedbam.mix(HIC_BWAMEM2.out.mergedbam) + // // LOGIC: PREPARING PRETEXT MAP INPUT // @@ -144,6 +147,7 @@ workflow HIC_MAPPING { ) } + // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR LOW RES // @@ -153,6 +157,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) + // // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT // @@ -166,6 +171,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) + // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES // @@ -175,6 +181,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) + // // NOTICE: This could fail on LARGE hires maps due to some memory parameter in the C code // of pretext graph. There is a "fixed" version in sanger /software which may need @@ -190,6 +197,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( PRETEXT_INGEST_HIRES.out.versions ) + // // MODULE: GENERATE PNG FROM STANDARD PRETEXT // @@ -198,6 +206,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix ( SNAPSHOT_SRES.out.versions ) + // // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G // @@ -216,6 +225,7 @@ workflow HIC_MAPPING { } .set { ch_merged_bam } + // LOGIC: PREPARE BAMTOBED JUICER INPUT. if ( workflow_setting != "RAPID_TOL" && params.juicer == false ) { // @@ -236,6 +246,7 @@ workflow HIC_MAPPING { } .set { ch_merged_bam } + // // MODULE: SUBSAMPLE BAM // @@ -244,12 +255,14 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) + // // LOGIC: COMBINE BRANCHED TO SINGLE OUTPUT // ch_subsampled_bam = SUBSAMPLE_BAM.out.subsampled_bam ch_subsampled_bam.mix(ch_merged_bam.unmodified) + // // LOGIC: PREPARE BAMTOBED JUICER INPUT // @@ -261,6 +274,7 @@ workflow HIC_MAPPING { } .set { ch_bamtobed_juicer_input } + // // SUBWORKFLOW: BAM TO BED FOR JUICER - USES THE SUBSAMPLED MERGED BAM // @@ -270,6 +284,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions ) + // // LOGIC: PREPARE JUICER TOOLS INPUT // @@ -282,6 +297,7 @@ workflow HIC_MAPPING { } .set { ch_juicer_input } + // // MODULE: GENERATE HIC MAP, ONLY IS PIPELINE IS RUNNING ON ENTRY FULL // @@ -293,6 +309,7 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions ) } + // // LOGIC: PREPARE BAMTOBED COOLER INPUT // @@ -304,6 +321,7 @@ workflow HIC_MAPPING { } .set { ch_bamtobed_cooler_input } + // // SUBWORKFLOW: BAM TO BED FOR COOLER // @@ -313,6 +331,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions ) + // // LOGIC: BIN CONTACT PAIRS // @@ -321,6 +340,7 @@ workflow HIC_MAPPING { .combine( ch_cool_bin ) .set { ch_binned_pairs } + // // LOGIC: PREPARE COOLER INPUT // @@ -332,6 +352,7 @@ workflow HIC_MAPPING { } .set { ch_cooler } + // // MODULE: GENERATE A MULTI-RESOLUTION COOLER FILE BY COARSENING // @@ -341,6 +362,7 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix(COOLER_CLOAD.out.versions) + // // LOGIC: REFACTOR CHANNEL FOR ZOOMIFY // @@ -350,16 +372,17 @@ workflow HIC_MAPPING { } .set{ch_cool} + // // MODULE: ZOOM COOL TO MCOOL // COOLER_ZOOMIFY(ch_cool) ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions) + // // LOGIC: FOR REPORTING // - ch_cram_files = GrabFiles( hic_reads_path ) ch_cram_files diff --git a/subworkflows/local/synteny.nf b/subworkflows/local/synteny.nf index 7a2b1c40..23837e48 100755 --- a/subworkflows/local/synteny.nf +++ b/subworkflows/local/synteny.nf @@ -7,8 +7,8 @@ include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/mai workflow SYNTENY { take: - reference_tuple // Channel: tuple [ val(meta), path(file) ] - synteny_paths // Channel: val(meta) + reference_tuple // Channel: tuple [ val(meta), path(file) ] + synteny_paths // Channel: val(meta) main: ch_versions = Channel.empty() diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index 69a5cb1f..0ce6ee6c 100755 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -39,8 +39,6 @@ workflow YAML_INPUT { } .set{ group } - group.synteny.view{"INPUT: $it"} - // // LOGIC: PARSES THE SECOND LEVEL OF YAML VALUES PER ABOVE OUTPUT CHANNEL // diff --git a/workflows/treeval.nf b/workflows/treeval.nf index d616f491..6a16ebba 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -199,7 +199,6 @@ workflow TREEVAL { // and generated a file of syntenic blocks. // if ( !exclude_workflow_steps.contains("synteny")) { - YAML_INPUT.out.synteny_paths.view {"SYNTENY_MAIN: $it"} SYNTENY ( YAML_INPUT.out.reference_ch, YAML_INPUT.out.synteny_paths @@ -207,6 +206,7 @@ workflow TREEVAL { ch_versions = ch_versions.mix( SYNTENY.out.versions ) } + // // SUBWORKFLOW: Takes reference, pacbio reads // diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index 43c640e7..cd71851c 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -65,6 +65,8 @@ workflow TREEVAL_RAPID { params.entry = 'RAPID' input_ch = Channel.fromPath(params.input, checkIfExists: true) + + // // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field // @@ -73,6 +75,7 @@ workflow TREEVAL_RAPID { params.entry ) + // // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file // @@ -82,6 +85,7 @@ workflow TREEVAL_RAPID { ) ch_versions = ch_versions.mix( GENERATE_GENOME.out.versions ) + // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // @@ -93,6 +97,7 @@ workflow TREEVAL_RAPID { ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) } + // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // @@ -103,6 +108,7 @@ workflow TREEVAL_RAPID { ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) } + // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // @@ -113,6 +119,7 @@ workflow TREEVAL_RAPID { ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) } + // // SUBWORKFLOW: Takes reference, pacbio reads // @@ -152,6 +159,7 @@ workflow TREEVAL_RAPID { hic_report = [] } + // // SUBWORKFLOW: Collates version data from prior subworflows // @@ -159,6 +167,7 @@ workflow TREEVAL_RAPID { ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + // // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // diff --git a/workflows/treeval_rapid_tol.nf b/workflows/treeval_rapid_tol.nf index 97dd067d..61821587 100755 --- a/workflows/treeval_rapid_tol.nf +++ b/workflows/treeval_rapid_tol.nf @@ -63,6 +63,8 @@ workflow TREEVAL_RAPID_TOL { params.entry = 'RAPID_TOL' input_ch = Channel.fromPath(params.input, checkIfExists: true) + + // // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field // @@ -71,6 +73,7 @@ workflow TREEVAL_RAPID_TOL { params.entry ) + // // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file // @@ -80,6 +83,7 @@ workflow TREEVAL_RAPID_TOL { ) ch_versions = ch_versions.mix( GENERATE_GENOME.out.versions ) + // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // @@ -91,6 +95,7 @@ workflow TREEVAL_RAPID_TOL { ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) } + // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // @@ -101,6 +106,7 @@ workflow TREEVAL_RAPID_TOL { ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) } + // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // @@ -111,6 +117,7 @@ workflow TREEVAL_RAPID_TOL { ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) } + // // SUBWORKFLOW: Takes reference, pacbio reads // @@ -126,6 +133,7 @@ workflow TREEVAL_RAPID_TOL { coverage_report = [] } + // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // @@ -137,6 +145,7 @@ workflow TREEVAL_RAPID_TOL { ch_versions = ch_versions.mix( KMER.out.versions ) } + // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // @@ -160,6 +169,7 @@ workflow TREEVAL_RAPID_TOL { hic_report = [] } + // // SUBWORKFLOW: Collates version data from prior subworflows // @@ -167,6 +177,7 @@ workflow TREEVAL_RAPID_TOL { ch_versions.unique().collectFile(name: 'collated_versions.yml') ) + // // LOGIC: GENERATE SOME CHANNELS FOR REPORTING //