nextflow.config

/*
 * -------------------------------------------------
 *  nf-core/scatacseqflow Nextflow config file
 * -------------------------------------------------
 * Default config options for all environments.
 */

// Global default params, used in configs
params {

//////// BELOW IS FOR PREPROCESS PARAMETER ////////

//// Basic settings:
  input_fastq         = false // path to samplesheet.csv that contain fastq file location
  preprocess          = false // choose from 'default', '10xgenomics', and 'chromap'
  support_genome      = false // if true, print supported genomes and exit
  species_latin_name  = false // used for find gene symbols if not present in GTF file with bioMart
  outlier_further     = true  // set to false to stop immediately after embedding step for outlier detection

//// Reference settings: 'default', '10xgenomics', 'chromap'
  ref_fasta           = false // path to reference fasta file, must be in .gz format; also used by 10xgenomics
  ref_gtf             = false // path to gtf file
  ref_fasta_ucsc      = false // if set, will download ucsc genome: 'hg19', 'hg38', 'mm10', etc.
  ref_fasta_ensembl   = false // if set, will download ensembl genome: 'homo_sapiens', etc.

//// PREPROCESS OPTION: 'default' ////
  split_fastq         = false  // whether or not to split fastq into chunks for more parallel jobs
  ref_bwa_index       = false // path to the bwa index folder
  barcode_correction  = 'pheniqs' // correction method: 'pheniqs' or 'naive'; if false to skip correciton; also applied to chromap PREPROCESSING, choose from true or false
  whitelist_barcode   = 'assets/whitelist_barcodes' // path to whitelist folder; also applied to chromap PREPROCESSING
  read1_adapter       = 'AGATCGGAAGAGC' // for trimming, default to the first 13 bp of Illumina standard adapters
  read2_adapter       = 'AGATCGGAAGAGC'
  mapper              = 'bwa'
  filter              = 'both' // choose from false (no bam filtering will be performed), 'improper' (reads with low mapping quality, extreme fragment size(outside of 38 - 2000bp), etc. will be filtered out), and 'both' ('improper' + mitochondrial reads will be filtered out.)

//// PREPROCESS OPTION: '10xgenomics' ////
  ref_cellranger_index = false // path to cellranger index folder

//// PREPRECESS OPTION: 'chromap' ////
  ref_chromap_index   = false // note that the indexing parameters such as -k, -w and --min-frag-length can't be changed during mapping.


//////// BELOW IS FOR DOWNSTREAM_ARCHR PARAMETER ////////

//// Doublet filtering algorithm:
  doublet_removal_algorithm   = 'archr' // choose from "archr", "amulet", or, false (skip doublet filtering)
  // if "amulet", also need to config: conf/modules.config -> amulet_detect_doublets.args and the followings:
  amulet_rmsk_bed             = false // path to known repeat regions in geome
  amulet_autosomes            = false // path to txt file containing autosomes, example: assets/homo_sapiens_autosomes.txt
    // dev notes: to use relative paths, the params must not be passed as module parameters

//// ARCHR BASIC PARAMETER ////
  input_fragment       = false // path to samplesheet.csv containing fragment file location
  archr_thread         = 8
  // archr_beforescript   = 'export HDF5_USE_FILE_LOCKING=FALSE ; export RHDF5_USE_FILE_LOCKING=FALSE' // to prevent issues when multithreading, use env instead sine beforeScript execute outside of container

//// ARCHR GENOME ////
  archr_genome        = false // prebuilt ArchR genome: choose from 'hg19', 'mm10', etc.
  archr_genome_fasta  = false // path to genome fasta used for ArchR
  // archr_gtf           = false // gtf used for ArchR, compared to ref_gtf, which is only for PREPROCESS indexing
  // use one single ref_gtf
  archr_txdb          = false // Bioconductor TxDb name
  archr_org           = false // Bioconductor OrgDb name
  archr_bsgenome      = false // Bioconductor BSgenome name
  archr_blacklist     = false // path to blacklist bed file, used for building ArchR custom genome file

//// ARCHR OTHER PARAMETERS ////
  archr_batch_correction_harmony = true // choose from true or false
  // Samples to get rid of for downstream analysis, refer to header line of archr_clustering/Cluster_xxx_matrix.csv for valid sample names
  filter_sample       = false // default to false meaning that no sample will be excluded
    // example: filter_sample = 'PBMC_1K_N, PBMC_5K_V'

  // Clusters to get rid of for downstream analysis, refer to archr_clustering/Cluster_xxx_matrix.csv for valid cluster names
  filter_seurat_ilsi    = false
  filter_seurat_harmony = false
    // example: filter_seurat_harmony = 'C1, C2'

  custom_peaks          = false // for motif enrichment/deviation module
    // example: custom_peaks = 'Encode_K562_GATA1 = "https://www.encodeproject.org/files/ENCFF632NQI/@@download/ENCFF632NQI.bed.gz", Encode_GM12878_CEBPB = "https://www.encodeproject.org/files/ENCFF761MGJ/@@download/ENCFF761MGJ.bed.gz", Encode_K562_Ebf1 = "https://www.encodeproject.org/files/ENCFF868VSY/@@download/ENCFF868VSY.bed.gz", Encode_K562_Pax5 = "https://www.encodeproject.org/files/ENCFF339KUO/@@download/ENCFF339KUO.bed.gz"'

  archr_scrnaseq            = false // path to RNAseq Seurat object
  archr_scrnaseq_grouplist  = false // scRNAseq cluster grouping information for constrained integration.
    // example: see conf/test.config

//////// OTHER PARAMETERS ////////

  // MultiQC options
  multiqc_config             = false
  // multiqc_title              = String [todo]
  max_multiqc_email_size     = 25.MB

  // Boilerplate options
  outdir                     = './results'
  tracedir                   = "${params.outdir}/pipeline_info"
  publish_dir_mode           = 'symlink'
  // email                      = false
  email_on_fail              = false
  plaintext_email            = false
  monochrome_logs            = false
  help                       = false
  validate_params            = true
  show_hidden_params         = false
  schema_ignore_params       = 'genomes,modules'
  enable_conda               = false
  singularity_pull_docker_container = false

  // Config options
  custom_config_version      = 'master'
  custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
  hostnames                  = [:]
  config_profile_description = false
  config_profile_contact     = false
  config_profile_url         = false
  config_profile_name        = false

  // Max resource options
  max_memory                 = '384.GB'
  max_cpus                   = 64
  max_time                   = '240.h'
}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'

// Load nf-core custom profiles from different Institutions
try {
  includeConfig "${params.custom_config_base}/nfcore_custom.config"
} catch (Exception e) {
  System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
}

profiles {
  local {
    params {
      max_memory = 8.GB
    }
  }
  debug { process.beforeScript = 'echo $HOSTNAME' }
  conda {
    params.enable_conda    = true
    docker.enabled         = false
    singularity.enabled    = false
    podman.enabled         = false
    shifter.enabled        = false
    charliecloud.enabled   = false
  }
  docker {
    docker.enabled         = true
    // Avoid this error:
    //   WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
    // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351
    // once this is established and works well, nextflow might implement this behavior as new default.
    // docker.runOptions      = '-u \$(id -u):\$(id -g)'
    docker.runOptions      = '-u \$(id -u):\$(id -g) --rm -v /Users:/Users -v /tmp:/tmp'
    // note that /tmp is for R to create and save R_tempDir/Rtmpxxxx: https://support.microsoft.com/en-us/topic/error-fatal-error-cannot-create-r-tempdir-21ae3934-3d91-f6dd-8752-3b3f7b4dbc40
    singularity.enabled    = false
    podman.enabled         = false
    shifter.enabled        = false
    charliecloud.enabled   = false
  }
  singularity {
    singularity.enabled    = true
    singularity.autoMounts = true
    singularity.pullTimeout = '100 min'
    docker.enabled         = false
    podman.enabled         = false
    shifter.enabled        = false
    charliecloud.enabled   = false
    cacheDir               = './work/singularity' // not working, setting NXF_SINGULARITY_CACHEDIR env variable not working either. must be supplied via command flag
    singularity.runOptions = '--bind /home:/home' // for test in hpcc
  }
  podman {
    podman.enabled         = true
    docker.enabled         = false
    singularity.enabled    = false
    shifter.enabled        = false
    charliecloud.enabled   = false
  }
  shifter {
    shifter.enabled        = true
    docker.enabled         = false
    singularity.enabled    = false
    podman.enabled         = false
    charliecloud.enabled   = false
  }
  charliecloud {
    charliecloud.enabled   = true
    docker.enabled         = false
    singularity.enabled    = false
    podman.enabled         = false
    shifter.enabled        = false
  }
  lsf {
    process {
      executor = 'lsf'
      queue = 'long'
    }
  }
  lsf_ghpcc {
    process {
      executor = 'lsf'
      clusterOptions = '-R "select[rh=8]"'
      // not work: jobName = { "${task.process.substring(0,200)}" } // https://nfcore.slack.com/archives/CE6SDBX2A/p1632865315466900
      // clusterOptions = '-R "select[rh=6]" -n 1' // will be two lines in .command.run automatically
      queue = 'long'
    }
  }
  test      { includeConfig 'conf/test.config'      }
  test_full { includeConfig 'conf/test_full.config' }
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
env {
  PYTHONNOUSERSITE = 1
  R_PROFILE_USER   = "/.Rprofile"
  R_ENVIRON_USER   = "/.Renviron"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
  enabled = true
  file    = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
}
report {
  enabled = true
  file    = "${params.tracedir}/execution_report_${trace_timestamp}.html"
}
trace {
  enabled = true
  file    = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
}
dag {
  enabled = true
  file    = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg"
}

manifest {
  name            = 'scATACpipe'
  author          = 'Kai Hu, Haibo, Julie Lihua Zhu'
  homePage        = 'https://github.com/hukai916/scATACpipe'
  description     = 'A Nextflow pipeline for scATAC-seq data analysis.'
  mainScript      = 'main.nf'
  nextflowVersion = '!>=21.06.0-edge'
  version         = '0.1.0dev'
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
  if (type == 'memory') {
    try {
      if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
        return params.max_memory as nextflow.util.MemoryUnit
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
      return obj
    }
  } else if (type == 'time') {
    try {
      if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
        return params.max_time as nextflow.util.Duration
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
      return obj
    }
  } else if (type == 'cpus') {
    try {
      return Math.min( obj, params.max_cpus as int )
    } catch (all) {
      println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
      return obj
    }
  }
}