From 5d37756bb84c252318f9f416b6617207d5344f43 Mon Sep 17 00:00:00 2001 From: Ted Laderas Date: Fri, 15 Mar 2024 16:48:53 +0000 Subject: [PATCH] Making comments in code consistent; added a table showing the workflow in `02-workflow-plan.Rmd` --- 01-intro.Rmd | 4 ++-- 02-workflow-plan.Rmd | 13 ++++++++----- 03-first-task.Rmd | 2 +- 04-linear-chain.Rmd | 6 +++--- 05-structs.Rmd | 22 +++++++++++----------- 5 files changed, 25 insertions(+), 22 deletions(-) diff --git a/01-intro.Rmd b/01-intro.Rmd index 1913057..0a70af8 100644 --- a/01-intro.Rmd +++ b/01-intro.Rmd @@ -111,7 +111,7 @@ task do_something { head ~{fastq} >> output.txt >>> output { - File first_ten_lines = "output.txt" #output variable for task + File first_ten_lines = "output.txt" ## output variable for task } } @@ -129,7 +129,7 @@ workflow my_workflow { } output { - File ten_lines = do_something.first_ten_lines #referring to task output here + File ten_lines = do_something.first_ten_lines ## referring to task output here } } ``` diff --git a/02-workflow-plan.Rmd b/02-workflow-plan.Rmd index 942dd52..fd2af8b 100644 --- a/02-workflow-plan.Rmd +++ b/02-workflow-plan.Rmd @@ -17,11 +17,14 @@ The workflow diagram: The tasks involved: -1. `BwaMem` aligns the samples to the reference genome (hg19). (outputs a `.bam` file) -2. `MarkDuplicates` marks PCR duplicates. (outputs a `.bam` file) -3. `ApplyBaseRecalibrator` perform base quality recalibration. (outputs a `.bam` file) -4. `Mutect2` performs paired somatic mutation calling. (outputs a `.vcf` file) -5. `annovar` annotates the called somatic mutations. (outputs a `.vcf` file) +|Task|Function|Inputs|Outputs| +|----|--------|------|-------| +|`BwaMem`|aligns the samples to the reference genome (hg19)|FASTA (`.fa`) file|`.bam` file| +|`MarkDuplicates|marks PCR duplicates|`.bam` file|marked `.bam` file| +|`ApplyBaseRecalibrator`|performs base quality recalibration|marked `.bam` file|`.bam` file| +|`Mutect2`|performs paired somatic mutation calling|`.bam` file|`.vcf` file| +|`annovar`|annotates the called somatic mutations|`.vcf` file with somatic mutations|annotated `.vcf` file| + ## Workflow testing strategy diff --git a/03-first-task.Rmd b/03-first-task.Rmd index 1389563..2bdb32a 100644 --- a/03-first-task.Rmd +++ b/03-first-task.Rmd @@ -235,7 +235,7 @@ task BwaMem { String read_group_id = "ID:" + base_file_name String sample_name = "SM:" + base_file_name String platform = "illumina" - String platform_info = "PL:" + platform # Create the platform information + String platform_info = "PL:" + platform ## Create the platform information command <<< set -eo pipefail diff --git a/04-linear-chain.Rmd b/04-linear-chain.Rmd index 7f8065a..569c039 100644 --- a/04-linear-chain.Rmd +++ b/04-linear-chain.Rmd @@ -298,7 +298,7 @@ task BwaMem { String read_group_id = "ID:" + base_file_name String sample_name = "SM:" + base_file_name String platform = "illumina" - String platform_info = "PL:" + platform # Create the platform information + String platform_info = "PL:" + platform ## Create the platform information command <<< @@ -331,7 +331,7 @@ task BwaMem { } } -# Mark duplicates +## Mark duplicates task MarkDuplicates { input { File input_bam @@ -415,7 +415,7 @@ task ApplyBaseRecalibrator { -R ~{ref_fasta_local} \ - #finds the current sort order of this bam file + ## finds the current sort order of this bam file samtools view -H ~{base_file_name}.recal.bam | grep @SQ | sed 's/@SQ\tSN:\|LN://g' > ~{base_file_name}.sortOrder.txt >>> diff --git a/05-structs.Rmd b/05-structs.Rmd index f25d19a..a921696 100644 --- a/05-structs.Rmd +++ b/05-structs.Rmd @@ -26,14 +26,14 @@ struct referenceGenome { workflow minidata_mutation_calling_v1 { input { File sampleFastq - referenceGenome refGenome + referenceGenome refGenome ## our struct ... } - # Map reads to reference + ## Map reads to reference call BwaMem { input: input_fastq = sampleFastq, - refGenome = refGenome + refGenome = refGenome ## our struct } } ``` @@ -81,13 +81,13 @@ task BwaMem { String read_group_id = "ID:" + base_file_name String sample_name = "SM:" + base_file_name String platform = "illumina" - String platform_info = "PL:" + platform # Create the platform information + String platform_info = "PL:" + platform ## Create the platform information command <<< set -eo pipefail - #can we iterate through a struct?? + ## can we iterate through a struct?? mv ~{refGenome.ref_fasta} . mv ~{refGenome.ref_fasta_index} . mv ~{refGenome.ref_dict} . @@ -167,7 +167,7 @@ workflow minidata_mutation_calling_v1 { call BwaMem { input: input_fastq = sampleFastq, - refGenome = refGenome + refGenome = refGenome ## our struct } call MarkDuplicates { @@ -183,7 +183,7 @@ workflow minidata_mutation_calling_v1 { dbSNP_vcf_index = dbSNP_vcf_index, known_indels_sites_VCFs = known_indels_sites_VCFs, known_indels_sites_indices = known_indels_sites_indices, - refGenome = refGenome + refGenome = refGenome ## our struct } call Mutect2TumorOnly { @@ -229,7 +229,7 @@ workflow minidata_mutation_calling_v1 { task BwaMem { input { File input_fastq - referenceGenome refGenome + referenceGenome refGenome ## our struct } String base_file_name = basename(input_fastq, ".fastq") @@ -238,7 +238,7 @@ task BwaMem { String read_group_id = "ID:" + base_file_name String sample_name = "SM:" + base_file_name String platform = "illumina" - String platform_info = "PL:" + platform # Create the platform information + String platform_info = "PL:" + platform ## Create the platform information command <<< @@ -315,7 +315,7 @@ task ApplyBaseRecalibrator { File dbSNP_vcf_index File known_indels_sites_VCFs File known_indels_sites_indices - referenceGenome refGenome + referenceGenome refGenome ## our struct } String base_file_name = basename(input_bam, ".duplicates_marked.bam") @@ -357,7 +357,7 @@ task ApplyBaseRecalibrator { -R ~{ref_fasta_local} \ - #finds the current sort order of this bam file + # finds the current sort order of this bam file samtools view -H ~{base_file_name}.recal.bam | grep @SQ | sed 's/@SQ\tSN:\|LN://g' > ~{base_file_name}.sortOrder.txt >>>