From 4528288603277a9649c3bef050b0ca0e3140696f Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sun, 2 Apr 2023 03:24:51 +0200
Subject: [PATCH 01/15] add cadd snv

---
 conf/modules/annotate_cadd.config   | 28 +++++++++++++++
 conf/test.config                    |  7 ++--
 main.nf                             |  1 +
 modules/local/cadd.nf               | 55 +++++++++++++++++++++++++++++
 nextflow.config                     |  1 +
 nextflow_schema.json                |  7 ++++
 subworkflows/local/annotate_cadd.nf | 26 ++++++++++++++
 subworkflows/local/annotate_snvs.nf | 20 +++++++----
 workflows/raredisease.nf            |  4 +++
 9 files changed, 140 insertions(+), 9 deletions(-)
 create mode 100644 conf/modules/annotate_cadd.config
 create mode 100644 modules/local/cadd.nf
 create mode 100644 subworkflows/local/annotate_cadd.nf

diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
new file mode 100644
index 00000000..c6827584
--- /dev/null
+++ b/conf/modules/annotate_cadd.config
@@ -0,0 +1,28 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args            = Additional arguments appended to command in module.
+        ext.args2           = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3           = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix          = File name prefix for output files.
+        ext.when            = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// CADD annotation
+//
+
+process {
+    withName: '.*ANNOTATE_SNVS:ANNOTATE_CADD:BCFTOOLS_VIEW' {
+        ext.args   = { "--output-type z --types indels" }
+        ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_indels" }
+    }
+
+    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:CADD' {
+        ext.when = { !(workflow.profile.contains("test")) }
+        ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_cadd" }
+    }
+}
diff --git a/conf/test.config b/conf/test.config
index 5fb4c9e8..20a18a2c 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -18,7 +18,10 @@ params {
     max_cpus       = 2
     max_memory     = '6.GB'
     max_time       = '2.h'
-    mt_contig_name = 'MT'
+
+    // reference params
+    igenomes_ignore = true
+    mt_contig_name  = 'MT'
 
     // Input data
     input          = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv'
@@ -55,5 +58,3 @@ process {
         ext.when = { workflow.stubRun }
     }
 }
-
-
diff --git a/main.nf b/main.nf
index 64368429..fc1c6ca4 100644
--- a/main.nf
+++ b/main.nf
@@ -23,6 +23,7 @@ params.fasta_fai                      = WorkflowMain.getGenomeAttribute(params,
 params.bwa                            = WorkflowMain.getGenomeAttribute(params, 'bwa')
 params.bwamem2                        = WorkflowMain.getGenomeAttribute(params, 'bwamem2')
 params.call_interval                  = WorkflowMain.getGenomeAttribute(params, 'call_interval')
+params.cadd_annotation                = WorkflowMain.getGenomeAttribute(params, 'cadd_annotation')
 params.gnomad_af                      = WorkflowMain.getGenomeAttribute(params, 'gnomad_af')
 params.gnomad_af_idx                  = WorkflowMain.getGenomeAttribute(params, 'gnomad_af_idx')
 params.intervals_wgs                  = WorkflowMain.getGenomeAttribute(params, 'intervals_wgs')
diff --git a/modules/local/cadd.nf b/modules/local/cadd.nf
new file mode 100644
index 00000000..ce1e369d
--- /dev/null
+++ b/modules/local/cadd.nf
@@ -0,0 +1,55 @@
+process CADD {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::cadd-scripts=1.6 anaconda::conda=4.14.0 conda-forge::mamba=1.4.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-8d145e7b16a8ca4bf920e6ca464763df6f0a56a2:d4e457a2edecb2b10e915c01d8f46e29e236b648-0':
+        'quay.io/biocontainers/mulled-v2-8d145e7b16a8ca4bf920e6ca464763df6f0a56a2:d4e457a2edecb2b10e915c01d8f46e29e236b648-0' }"
+
+    containerOptions {
+        (workflow.containerEngine == 'singularity') ?
+            "--writable -B ${annotation_dir}:/usr/local/share/cadd-scripts-1.6-1/data/annotations" :
+            "--privileged -v ${annotation_dir}:/usr/local/share/cadd-scripts-1.6-1/data/annotations"
+        }
+
+    input:
+    tuple val(meta), path(vcf)
+    path(annotation_dir)
+
+    output:
+    tuple val(meta), path("*.tsv.gz"), emit: tsv
+    path "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.6" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    """
+    cadd.sh \\
+        -o ${prefix}.tsv.gz \\
+        $args \\
+        $vcf
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cadd: $VERSION
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = "1.6" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    """
+    touch ${prefix}.tsv.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cadd: $VERSION
+    END_VERSIONS
+    """
+}
diff --git a/nextflow.config b/nextflow.config
index c0cbfbac..39ba2245 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -265,6 +265,7 @@ includeConfig 'conf/modules/prepare_references.config'
 includeConfig 'conf/modules/qc_bam.config'
 includeConfig 'conf/modules/rank_variants.config'
 includeConfig 'conf/modules/scatter_genome.config'
+includeConfig 'conf/modules/annotate_cadd.config'
 
 // Function to ensure that resource requirements don't go beyond
 // a maximum limit
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 9cebc236..75a4ccc6 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -103,6 +103,13 @@
                     "fa_icon": "fas fa-folder-open",
                     "hidden": true
                 },
+                "cadd_annotation": {
+                    "type": "string",
+                    "format": "directory-path",
+                    "fa_icon": "fas fa-file",
+                    "description": "Path to the directory containing cadd annotations.",
+                    "help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
+                },
                 "gnomad_af": {
                     "type": "string",
                     "format": "path",
diff --git a/subworkflows/local/annotate_cadd.nf b/subworkflows/local/annotate_cadd.nf
new file mode 100644
index 00000000..2f332bcd
--- /dev/null
+++ b/subworkflows/local/annotate_cadd.nf
@@ -0,0 +1,26 @@
+//
+// A subworkflow to annotate snvs
+//
+
+include { BCFTOOLS_VIEW } from '../../modules/nf-core/bcftools/view/main'
+include { CADD          } from '../../modules/local/cadd'
+
+workflow ANNOTATE_CADD {
+
+    take:
+        ch_vcf         // channel: [mandatory] [ val(meta), path(vcfs) ]
+        ch_index       // channel: [mandatory] [ val(meta), path(tbis) ]
+        ch_cadd_scores // channel: [mandatory] [ val(meta), path(dir) ]
+
+    main:
+        ch_versions       = Channel.empty()
+
+        BCFTOOLS_VIEW(ch_vcf.join(ch_index),[],[],[])
+
+        CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_scores)
+
+    emit:
+        vcf_ann  = Channel.empty()   // channel: [ val(meta), path(vcf) ]
+        // tbi      = ch_vep_index // channel: [ val(meta), path(tbi) ]
+        // versions = ch_versions  // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_snvs.nf
index cfbb4b04..9fca95f5 100644
--- a/subworkflows/local/annotate_snvs.nf
+++ b/subworkflows/local/annotate_snvs.nf
@@ -14,12 +14,14 @@ include { TABIX_TABIX as TABIX_VEP              } from '../../modules/nf-core/ta
 include { TABIX_TABIX as TABIX_BCFTOOLS_CONCAT  } from '../../modules/nf-core/tabix/tabix/main'
 include { TABIX_TABIX as TABIX_BCFTOOLS_VIEW    } from '../../modules/nf-core/tabix/tabix/main'
 include { GATK4_SELECTVARIANTS                  } from '../../modules/nf-core/gatk4/selectvariants/main'
+include { ANNOTATE_CADD                         } from '../../subworkflows/local/annotate_cadd'
 
 workflow ANNOTATE_SNVS {
 
     take:
         ch_vcf                // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ]
         analysis_type         // string: [mandatory] 'wgs' or 'wes'
+        ch_cadd_scores        // channel: [mandatory] [ path(annotation) ]
         ch_vcfanno_resources  // channel: [mandatory] [ path(resources) ]
         ch_vcfanno_lua        // channel: [mandatory] [ path(lua) ]
         ch_vcfanno_toml       // channel: [mandatory] [ path(toml) ]
@@ -68,10 +70,16 @@ workflow ANNOTATE_SNVS {
 
         BCFTOOLS_VIEW.out.vcf.join(TABIX_BCFTOOLS_VIEW.out.tbi).collect().set { ch_vcf_scatter_in }
 
-        GATK4_SELECTVARIANTS (ch_vcf_scatter_in.combine(ch_split_intervals)).vcf.set { ch_vep_in }
+        GATK4_SELECTVARIANTS (ch_vcf_scatter_in.combine(ch_split_intervals))
+
+        ANNOTATE_CADD (
+            GATK4_SELECTVARIANTS.out.vcf,
+            GATK4_SELECTVARIANTS.out.tbi,
+            ch_cadd_scores
+        )
 
         ENSEMBLVEP_SNV(
-            ch_vep_in,
+            GATK4_SELECTVARIANTS.out.vcf,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
@@ -82,13 +90,13 @@ workflow ANNOTATE_SNVS {
 
         TABIX_VEP (ENSEMBLVEP_SNV.out.vcf_gz)
 
-        ch_vep_ann   = ENSEMBLVEP_SNV.out.vcf_gz
-        ch_vep_index = TABIX_VEP.out.tbi
+        ch_vep_ann       = ENSEMBLVEP_SNV.out.vcf_gz
+        ch_vep_index     = TABIX_VEP.out.tbi
 
         if (params.analysis_type == 'wgs') {
 
-            ENSEMBLVEP_SNV.out.vcf_gz
-                .join(TABIX_VEP.out.tbi)
+            ch_vep_ann
+                .join(ch_vep_index)
                 .groupTuple()
                 .map { meta, vcfs, tbis ->
                     def sortedvcfs = vcfs.sort { it.baseName }
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index eb917010..ea6039a3 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -14,6 +14,7 @@ def checkPathParamList = [
     params.bwa,
     params.bwamem2,
     params.call_interval,
+    params.cadd_annotation,
     params.fasta,
     params.fasta_fai,
     params.gens_gnomad_pos,
@@ -140,6 +141,8 @@ workflow RAREDISEASE {
     }
 
     // Initialize all file channels including unprocessed vcf, bed and tab files
+    ch_cadd_scores                    = params.cadd_annotation                ? Channel.fromPath(params.cadd_annotation).collect()
+                                                                              : Channel.value([])
     ch_call_interval                  = params.call_interval                  ? Channel.fromPath(params.call_interval).collect()
                                                                               : Channel.value([])
     ch_genome_fasta_no_meta           = params.fasta                          ? Channel.fromPath(params.fasta).collect()
@@ -448,6 +451,7 @@ workflow RAREDISEASE {
         ANNOTATE_SNVS (
             ch_vcf,
             params.analysis_type,
+            ch_cadd_scores,
             ch_vcfanno_resources,
             ch_vcfanno_lua,
             ch_vcfanno_toml,

From e1fbab6b6f2a77f5e4cd4eeaa244d421079c974f Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Sun, 2 Apr 2023 23:32:10 +0200
Subject: [PATCH 02/15] add cadd subworkflow

---
 assets/cadd_to_vcf_header_-1.0-.txt |  1 +
 conf/modules/annotate_cadd.config   | 16 +++++++--
 conf/modules/scatter_genome.config  |  4 +--
 subworkflows/local/annotate_cadd.nf | 41 +++++++++++++++++++----
 subworkflows/local/annotate_snvs.nf | 50 +++++++++++++++++------------
 workflows/raredisease.nf            |  2 ++
 6 files changed, 82 insertions(+), 32 deletions(-)
 create mode 100644 assets/cadd_to_vcf_header_-1.0-.txt

diff --git a/assets/cadd_to_vcf_header_-1.0-.txt b/assets/cadd_to_vcf_header_-1.0-.txt
new file mode 100644
index 00000000..5d244377
--- /dev/null
+++ b/assets/cadd_to_vcf_header_-1.0-.txt
@@ -0,0 +1 @@
+##INFO=<ID=CADD,Number=1,Type=Float,Description="The CADD relative score for this alternative.">
diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index c6827584..de1fd8ce 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -12,7 +12,7 @@
 */
 
 //
-// CADD annotation
+// CADD annotation         ext.when = { !(workflow.profile.contains("test")) }
 //
 
 process {
@@ -22,7 +22,19 @@ process {
     }
 
     withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:CADD' {
-        ext.when = { !(workflow.profile.contains("test")) }
         ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_cadd" }
     }
+
+    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:CADD' {
+        ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_cadd" }
+    }
+
+    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:TABIX_CADD' {
+        ext.args = { "--force --sequence 1 --begin 2 --end 2" }
+    }
+
+    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:BCFTOOLS_ANNOTATE' {
+        ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD" }
+        ext.prefix = { "${input.simpleName}_rohann_vcfanno_filter_cadd" }
+    }
 }
diff --git a/conf/modules/scatter_genome.config b/conf/modules/scatter_genome.config
index f8ecf792..b2fe363a 100644
--- a/conf/modules/scatter_genome.config
+++ b/conf/modules/scatter_genome.config
@@ -17,12 +17,12 @@
 
 process {
     withName: '.*SCATTER_GENOME:BUILD_BED' {
-        ext.when = { !params.skip_snv_annotation && !(params.analysis_type == "wes")}
+        ext.when = { !params.skip_snv_annotation }
     }
 
     withName: '.*SCATTER_GENOME:GATK4_SPLITINTERVALS' {
         ext.args = { "--subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION --scatter-count 22" }
-        ext.when = { !params.skip_snv_annotation && !(params.analysis_type == "wes")}
+        ext.when = { !params.skip_snv_annotation }
         ext.prefix = { "${meta.id}_genome_intervals" }
         publishDir = [
             enabled: params.save_reference,
diff --git a/subworkflows/local/annotate_cadd.nf b/subworkflows/local/annotate_cadd.nf
index 2f332bcd..6bf1b533 100644
--- a/subworkflows/local/annotate_cadd.nf
+++ b/subworkflows/local/annotate_cadd.nf
@@ -2,25 +2,52 @@
 // A subworkflow to annotate snvs
 //
 
-include { BCFTOOLS_VIEW } from '../../modules/nf-core/bcftools/view/main'
-include { CADD          } from '../../modules/local/cadd'
+include { BCFTOOLS_ANNOTATE             } from '../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_VIEW                 } from '../../modules/nf-core/bcftools/view/main'
+include { CADD                          } from '../../modules/local/cadd'
+include { TABIX_TABIX as TABIX_ANNOTATE } from '../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_CADD     } from '../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_VIEW     } from '../../modules/nf-core/tabix/tabix/main'
 
 workflow ANNOTATE_CADD {
 
     take:
         ch_vcf         // channel: [mandatory] [ val(meta), path(vcfs) ]
         ch_index       // channel: [mandatory] [ val(meta), path(tbis) ]
-        ch_cadd_scores // channel: [mandatory] [ val(meta), path(dir) ]
+        ch_header      // channel: [mandatory] [ path(txt) ]
+        ch_cadd_scores // channel: [mandatory] [ path(dir) ]
 
     main:
         ch_versions       = Channel.empty()
 
-        BCFTOOLS_VIEW(ch_vcf.join(ch_index),[],[],[])
+        BCFTOOLS_VIEW(ch_vcf.join(ch_index), [], [], [])
+
+        TABIX_VIEW(BCFTOOLS_VIEW.out.vcf)
 
         CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_scores)
 
+        TABIX_CADD(CADD.out.tsv)
+
+        BCFTOOLS_VIEW.out.vcf
+            .join(TABIX_VIEW.out.tbi)
+            .join(CADD.out.tsv)
+            .join(TABIX_CADD.out.tbi)
+            .groupTuple()
+            .set { ch_annotate_in }
+
+        BCFTOOLS_ANNOTATE(ch_annotate_in, ch_header)
+
+        TABIX_ANNOTATE (BCFTOOLS_ANNOTATE.out.vcf)
+
+        ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first())
+        ch_versions = ch_versions.mix(TABIX_VIEW.out.versions.first())
+        ch_versions = ch_versions.mix(CADD.out.versions.first())
+        ch_versions = ch_versions.mix(TABIX_CADD.out.versions.first())
+        ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first())
+        ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions.first())
+
     emit:
-        vcf_ann  = Channel.empty()   // channel: [ val(meta), path(vcf) ]
-        // tbi      = ch_vep_index // channel: [ val(meta), path(tbi) ]
-        // versions = ch_versions  // channel: [ path(versions.yml) ]
+        vcf  = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ]
+        tbi  = TABIX_ANNOTATE.out.tbi    // channel: [ val(meta), path(tbi) ]
+        versions = ch_versions           // channel: [ path(versions.yml) ]
 }
diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_snvs.nf
index 9fca95f5..ad05012b 100644
--- a/subworkflows/local/annotate_snvs.nf
+++ b/subworkflows/local/annotate_snvs.nf
@@ -21,6 +21,7 @@ workflow ANNOTATE_SNVS {
     take:
         ch_vcf                // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ]
         analysis_type         // string: [mandatory] 'wgs' or 'wes'
+        ch_cadd_header        // channel: [mandatory] [ path(txt) ]
         ch_cadd_scores        // channel: [mandatory] [ path(annotation) ]
         ch_vcfanno_resources  // channel: [mandatory] [ path(resources) ]
         ch_vcfanno_lua        // channel: [mandatory] [ path(lua) ]
@@ -68,18 +69,26 @@ workflow ANNOTATE_SNVS {
 
         TABIX_BCFTOOLS_VIEW (BCFTOOLS_VIEW.out.vcf)
 
-        BCFTOOLS_VIEW.out.vcf.join(TABIX_BCFTOOLS_VIEW.out.tbi).collect().set { ch_vcf_scatter_in }
+        BCFTOOLS_VIEW.out.vcf.join(TABIX_BCFTOOLS_VIEW.out.tbi)
+            .collect()
+            .combine(ch_split_intervals)
+            .map {
+                meta, vcf, tbi, interval ->
+                return [meta + [scatterid:interval.baseName], vcf, tbi, interval]
+            }
+            .set { ch_vcf_scatter_in }
 
-        GATK4_SELECTVARIANTS (ch_vcf_scatter_in.combine(ch_split_intervals))
+        GATK4_SELECTVARIANTS (ch_vcf_scatter_in)
 
         ANNOTATE_CADD (
             GATK4_SELECTVARIANTS.out.vcf,
             GATK4_SELECTVARIANTS.out.tbi,
+            ch_cadd_header,
             ch_cadd_scores
         )
 
         ENSEMBLVEP_SNV(
-            GATK4_SELECTVARIANTS.out.vcf,
+            ANNOTATE_CADD.out.vcf,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
@@ -88,30 +97,29 @@ workflow ANNOTATE_SNVS {
             []
         )
 
-        TABIX_VEP (ENSEMBLVEP_SNV.out.vcf_gz)
+        ENSEMBLVEP_SNV.out.vcf_gz
+            .map { meta, vcf -> [meta - meta.subMap('scatterid'), vcf] }
+            .set { ch_vep_out }
 
-        ch_vep_ann       = ENSEMBLVEP_SNV.out.vcf_gz
-        ch_vep_index     = TABIX_VEP.out.tbi
+        TABIX_VEP (ch_vep_out)
 
-        if (params.analysis_type == 'wgs') {
+        ch_vep_out
+            .join(TABIX_VEP.out.tbi)
+            .groupTuple()
+            .map { meta, vcfs, tbis ->
+                def sortedvcfs = vcfs.sort { it.baseName }
+                def sortedtbis = tbis.sort { it.baseName }
+                return [ meta, sortedvcfs, sortedtbis ]
+            }
+            .set { ch_concat_in }
 
-            ch_vep_ann
-                .join(ch_vep_index)
-                .groupTuple()
-                .map { meta, vcfs, tbis ->
-                    def sortedvcfs = vcfs.sort { it.baseName }
-                    def sortedtbis = tbis.sort { it.baseName }
-                    return [ meta, sortedvcfs, sortedtbis ]
-                }
-                .set { ch_concat_in }
+        BCFTOOLS_CONCAT (ch_concat_in)
 
-            BCFTOOLS_CONCAT (ch_concat_in)
+        TABIX_BCFTOOLS_CONCAT (BCFTOOLS_CONCAT.out.vcf)
 
-            TABIX_BCFTOOLS_CONCAT (BCFTOOLS_CONCAT.out.vcf)
+        ch_vep_ann   = BCFTOOLS_CONCAT.out.vcf
+        ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi
 
-            ch_vep_ann   = BCFTOOLS_CONCAT.out.vcf
-            ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi
-        }
         ch_versions = ch_versions.mix(BCFTOOLS_ROH.out.versions)
         ch_versions = ch_versions.mix(RHOCALL_ANNOTATE.out.versions)
         ch_versions = ch_versions.mix(ZIP_TABIX_ROHCALL.out.versions)
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index ea6039a3..755cad42 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -141,6 +141,7 @@ workflow RAREDISEASE {
     }
 
     // Initialize all file channels including unprocessed vcf, bed and tab files
+    ch_cadd_header                    = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
     ch_cadd_scores                    = params.cadd_annotation                ? Channel.fromPath(params.cadd_annotation).collect()
                                                                               : Channel.value([])
     ch_call_interval                  = params.call_interval                  ? Channel.fromPath(params.call_interval).collect()
@@ -451,6 +452,7 @@ workflow RAREDISEASE {
         ANNOTATE_SNVS (
             ch_vcf,
             params.analysis_type,
+            ch_cadd_header,
             ch_cadd_scores,
             ch_vcfanno_resources,
             ch_vcfanno_lua,

From d6cdfd43c9121db4afd6f7ddd19c514fa199d7a1 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 3 Apr 2023 15:45:31 +0200
Subject: [PATCH 03/15] add bcftools annotate

---
 modules.json                               |  5 ++
 modules/nf-core/bcftools/annotate/main.nf  | 64 ++++++++++++++++++++++
 modules/nf-core/bcftools/annotate/meta.yml | 56 +++++++++++++++++++
 3 files changed, 125 insertions(+)
 create mode 100644 modules/nf-core/bcftools/annotate/main.nf
 create mode 100644 modules/nf-core/bcftools/annotate/meta.yml

diff --git a/modules.json b/modules.json
index fa00b5f6..092d75a1 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "bcftools/annotate": {
+                        "branch": "master",
+                        "git_sha": "00567d35852dfde7e30a707b8d2e415dfa9d5970",
+                        "installed_by": ["modules"]
+                    },
                     "bcftools/concat": {
                         "branch": "master",
                         "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec",
diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf
new file mode 100644
index 00000000..c529f2c2
--- /dev/null
+++ b/modules/nf-core/bcftools/annotate/main.nf
@@ -0,0 +1,64 @@
+process BCFTOOLS_ANNOTATE {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::bcftools=1.16"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+        'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
+
+    input:
+    tuple val(meta), path(input), path(index), path(annotations), path(annotations_index)
+    path(header_lines)
+
+    output:
+    tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf
+    path "versions.yml"                               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args    = task.ext.args ?: ''
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    def header_file = header_lines ? "--header-lines ${header_lines}" : ''
+    def annotations_file = annotations ? "--annotations ${annotations}" : ''
+    def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+                    args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+                    args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+                    args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+                    "vcf"
+    if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
+    """
+    bcftools \\
+        annotate \\
+        $args \\
+        $annotations_file \\
+        $header_file \\
+        --output ${prefix}.${extension} \\
+        --threads $task.cpus \\
+        $input
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' )
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+                    args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+                    args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+                    args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+                    "vcf"
+    """
+    touch ${prefix}.${extension}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml
new file mode 100644
index 00000000..60f053ea
--- /dev/null
+++ b/modules/nf-core/bcftools/annotate/meta.yml
@@ -0,0 +1,56 @@
+name: bcftools_annotate
+description: Add or remove annotations.
+keywords:
+  - bcftools
+  - annotate
+  - vcf
+  - remove
+  - add
+tools:
+  - annotate:
+      description: Add or remove annotations.
+      homepage: http://samtools.github.io/bcftools/bcftools.html
+      documentation: https://samtools.github.io/bcftools/bcftools.html#annotate
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input:
+      type: file
+      description: Query VCF or BCF file, can be either uncompressed or compressed
+  - index:
+      type: file
+      description: Index of the query VCF or BCF file
+  - annotations:
+      type: file
+      description: Bgzip-compressed file with annotations
+  - annotations_index:
+      type: file
+      description: Index of the annotations file
+  - header_lines:
+      type: file
+      description: Contains lines to append to the output VCF header
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - vcf:
+      type: file
+      description: Compressed annotated VCF file
+      pattern: "*{vcf,vcf.gz,bcf,bcf.gz}"
+
+authors:
+  - "@projectoriented"
+  - "@ramprasadn"

From 364a25db3f6938adf60899b80dac7925bd614214 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 3 Apr 2023 15:51:03 +0200
Subject: [PATCH 04/15] update cadd subworkflow

---
 subworkflows/local/annotate_snvs.nf                  |  2 +-
 subworkflows/local/{ => annotation}/annotate_cadd.nf | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)
 rename subworkflows/local/{ => annotation}/annotate_cadd.nf (74%)

diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_snvs.nf
index ad05012b..e3c4d55f 100644
--- a/subworkflows/local/annotate_snvs.nf
+++ b/subworkflows/local/annotate_snvs.nf
@@ -14,7 +14,7 @@ include { TABIX_TABIX as TABIX_VEP              } from '../../modules/nf-core/ta
 include { TABIX_TABIX as TABIX_BCFTOOLS_CONCAT  } from '../../modules/nf-core/tabix/tabix/main'
 include { TABIX_TABIX as TABIX_BCFTOOLS_VIEW    } from '../../modules/nf-core/tabix/tabix/main'
 include { GATK4_SELECTVARIANTS                  } from '../../modules/nf-core/gatk4/selectvariants/main'
-include { ANNOTATE_CADD                         } from '../../subworkflows/local/annotate_cadd'
+include { ANNOTATE_CADD                         } from './annotation/annotate_cadd'
 
 workflow ANNOTATE_SNVS {
 
diff --git a/subworkflows/local/annotate_cadd.nf b/subworkflows/local/annotation/annotate_cadd.nf
similarity index 74%
rename from subworkflows/local/annotate_cadd.nf
rename to subworkflows/local/annotation/annotate_cadd.nf
index 6bf1b533..d0d77cd2 100644
--- a/subworkflows/local/annotate_cadd.nf
+++ b/subworkflows/local/annotation/annotate_cadd.nf
@@ -2,12 +2,12 @@
 // A subworkflow to annotate snvs
 //
 
-include { BCFTOOLS_ANNOTATE             } from '../../modules/nf-core/bcftools/annotate/main'
-include { BCFTOOLS_VIEW                 } from '../../modules/nf-core/bcftools/view/main'
-include { CADD                          } from '../../modules/local/cadd'
-include { TABIX_TABIX as TABIX_ANNOTATE } from '../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_CADD     } from '../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_VIEW     } from '../../modules/nf-core/tabix/tabix/main'
+include { BCFTOOLS_ANNOTATE             } from '../../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_VIEW                 } from '../../../modules/nf-core/bcftools/view/main'
+include { CADD                          } from '../../../modules/local/cadd'
+include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_CADD     } from '../../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_VIEW     } from '../../../modules/nf-core/tabix/tabix/main'
 
 workflow ANNOTATE_CADD {
 

From ce263058d89cb6d73ea1cf071b83c3ce3731efcb Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 3 Apr 2023 16:23:25 +0200
Subject: [PATCH 05/15] update annotate options

---
 conf/modules/annotate_cadd.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index de1fd8ce..0bd5469d 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -34,7 +34,7 @@ process {
     }
 
     withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:BCFTOOLS_ANNOTATE' {
-        ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD" }
+        ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z" }
         ext.prefix = { "${input.simpleName}_rohann_vcfanno_filter_cadd" }
     }
 }

From 3357035378830cc35c1a31add8fabc3a07b842d2 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 3 Apr 2023 17:01:14 +0200
Subject: [PATCH 06/15] update config

---
 conf/modules/annotate_cadd.config | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index 0bd5469d..953930fd 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -16,24 +16,24 @@
 //
 
 process {
-    withName: '.*ANNOTATE_SNVS:ANNOTATE_CADD:BCFTOOLS_VIEW' {
+    withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
         ext.args   = { "--output-type z --types indels" }
         ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_indels" }
     }
 
-    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:CADD' {
+    withName: '.*:ANNOTATE_CADD:CADD' {
         ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_cadd" }
     }
 
-    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:CADD' {
+    withName: '.*:ANNOTATE_CADD:CADD' {
         ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_cadd" }
     }
 
-    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:TABIX_CADD' {
+    withName: '.*:ANNOTATE_CADD:TABIX_CADD' {
         ext.args = { "--force --sequence 1 --begin 2 --end 2" }
     }
 
-    withName: '.*:ANNOTATE_SNVS:ANNOTATE_CADD:BCFTOOLS_ANNOTATE' {
+    withName: '.*:ANNOTATE_CADD:BCFTOOLS_ANNOTATE' {
         ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z" }
         ext.prefix = { "${input.simpleName}_rohann_vcfanno_filter_cadd" }
     }

From cf00edb3835b645138c7972a3a65f6c88016b805 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 4 Apr 2023 00:50:11 +0200
Subject: [PATCH 07/15] add to mt subworkflow

---
 conf/modules/annotate_cadd.config             | 17 ++++----
 subworkflows/local/analyse_MT.nf              |  4 ++
 subworkflows/local/annotate_snvs.nf           | 18 ++++++++-
 .../local/mitochondria/merge_annotate_MT.nf   | 40 ++++++++++++++++---
 workflows/raredisease.nf                      |  2 +
 5 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index 953930fd..5c510214 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -12,21 +12,22 @@
 */
 
 //
-// CADD annotation         ext.when = { !(workflow.profile.contains("test")) }
+// CADD annotation
 //
 
 process {
-    withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
-        ext.args   = { "--output-type z --types indels" }
-        ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_indels" }
+    withName: '.*:ANNOTATE_CADD.*' {
+        ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
     }
 
-    withName: '.*:ANNOTATE_CADD:CADD' {
-        ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_cadd" }
+    withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
+        ext.args   = { "--output-type z --types indels" }
+        ext.prefix = { "${vcf.simpleName}_indels" }
     }
 
     withName: '.*:ANNOTATE_CADD:CADD' {
-        ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_cadd" }
+        ext.args   = { "-g ${params.genome}" }
+        ext.prefix = { "${vcf.simpleName}_cadd" }
     }
 
     withName: '.*:ANNOTATE_CADD:TABIX_CADD' {
@@ -35,6 +36,6 @@ process {
 
     withName: '.*:ANNOTATE_CADD:BCFTOOLS_ANNOTATE' {
         ext.args   = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z" }
-        ext.prefix = { "${input.simpleName}_rohann_vcfanno_filter_cadd" }
+        ext.prefix = { "${input.simpleName}_ann" }
     }
 }
diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf
index 0ad82f1a..a4c2cd26 100644
--- a/subworkflows/local/analyse_MT.nf
+++ b/subworkflows/local/analyse_MT.nf
@@ -10,6 +10,8 @@ include { MERGE_ANNOTATE_MT                            } from './mitochondria/me
 workflow ANALYSE_MT {
     take:
         ch_bam                    // channel: [mandatory] [ val(meta), file(bam), file(bai) ]
+        ch_cadd_header            // channel: [mandatory] [ path(txt) ]
+        ch_cadd_scores            // channel: [mandatory] [ path(annotation) ]
         ch_genome_bwa_index       // channel: [mandatory] [ path(index) ]
         ch_genome_bwamem2_index   // channel: [mandatory] [ path(index) ]
         ch_genome_fasta_meta      // channel: [mandatory] [ val(meta), path(fasta) ]
@@ -78,6 +80,8 @@ workflow ANALYSE_MT {
         MERGE_ANNOTATE_MT(
             ALIGN_AND_CALL_MT.out.vcf,
             PICARD_LIFTOVERVCF.out.vcf_lifted,
+            ch_cadd_header,
+            ch_cadd_scores,
             ch_genome_fasta_no_meta,
             ch_genome_dict_meta,
             ch_genome_dict_no_meta,
diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_snvs.nf
index e3c4d55f..74da97de 100644
--- a/subworkflows/local/annotate_snvs.nf
+++ b/subworkflows/local/annotate_snvs.nf
@@ -80,6 +80,7 @@ workflow ANNOTATE_SNVS {
 
         GATK4_SELECTVARIANTS (ch_vcf_scatter_in)
 
+        // Annotating with CADD
         ANNOTATE_CADD (
             GATK4_SELECTVARIANTS.out.vcf,
             GATK4_SELECTVARIANTS.out.tbi,
@@ -87,8 +88,22 @@ workflow ANNOTATE_SNVS {
             ch_cadd_scores
         )
 
+        // Pick input for VEP
+        GATK4_SELECTVARIANTS.out.vcf
+            .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null"))
+            .branch { it  ->
+                selvar: it[2].equals("null")
+                    return [it[0], it[1]]
+                cadd: !(it[2].equals("null"))
+                    return [it[2], it[3]]
+            }
+            .set { ch_for_mix }
+
+        ch_vep_in = ch_for_mix.selvar.mix(ch_for_mix.cadd)
+
+        // Annotating with ensembl Vep
         ENSEMBLVEP_SNV(
-            ANNOTATE_CADD.out.vcf,
+            ch_vep_in,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
@@ -128,6 +143,7 @@ workflow ANNOTATE_SNVS {
         ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions)
         ch_versions = ch_versions.mix(TABIX_BCFTOOLS_VIEW.out.versions)
         ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions.first())
+        ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
         ch_versions = ch_versions.mix(ENSEMBLVEP_SNV.out.versions.first())
         ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first())
         ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions)
diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf
index 33886f4e..d904f500 100644
--- a/subworkflows/local/mitochondria/merge_annotate_MT.nf
+++ b/subworkflows/local/mitochondria/merge_annotate_MT.nf
@@ -9,16 +9,20 @@ include { TABIX_TABIX as TABIX_TABIX_MT                         } from '../../..
 include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT                 } from '../../../modules/nf-core/bcftools/norm/main'
 include { TABIX_TABIX as TABIX_TABIX_MT2                        } from '../../../modules/nf-core/tabix/tabix/main'
 include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT                   } from '../../../modules/nf-core/bcftools/merge/main'
+include { TABIX_TABIX as TABIX_TABIX_MERGE                      } from '../../../modules/nf-core/tabix/tabix/main'
 include { TABIX_TABIX as TABIX_TABIX_MT3                        } from '../../../modules/nf-core/tabix/tabix/main'
 include { ENSEMBLVEP as ENSEMBLVEP_MT                           } from '../../../modules/local/ensemblvep/main'
 include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT         } from '../../../modules/nf-core/haplogrep2/classify/main'
 include { VCFANNO as VCFANNO_MT                                 } from '../../../modules/nf-core/vcfanno/main'
 include { TABIX_BGZIPTABIX as ZIP_TABIX_VCFANNO                 } from '../../../modules/nf-core/tabix/bgziptabix/main'
+include { ANNOTATE_CADD                                         } from '../annotation/annotate_cadd'
 
 workflow MERGE_ANNOTATE_MT {
     take:
         ch_vcf1                // channel: [mandatory] [ val(meta), path(vcf) ]
         ch_vcf2                // channel: [mandatory] [ val(meta), path(vcf) ]
+        ch_cadd_header         // channel: [mandatory] [ path(txt) ]
+        ch_cadd_scores         // channel: [mandatory] [ path(annotation) ]
         ch_genome_fasta        // channel: [mandatory] [ path(fasta) ]
         ch_genome_dict_meta    // channel: [mandatory] [ val(meta), path(dict) ]
         ch_genome_dict_no_meta // channel: [mandatory] [ path(dict) ]
@@ -83,18 +87,43 @@ workflow MERGE_ANNOTATE_MT {
             [],
             ch_genome_fasta,
             ch_genome_fai)
-        ch_merged_vcf = BCFTOOLS_MERGE_MT.out.merged_variants
 
-        ch_in_vep = ch_merged_vcf.mix(ch_case_vcf.single)
+        BCFTOOLS_MERGE_MT.out.merged_variants
+            .mix(ch_case_vcf.single)
+            .set { ch_annotation_in }
+
+        TABIX_TABIX_MERGE(ch_annotation_in)
+
+        // Annotating with CADD
+        ANNOTATE_CADD (
+            ch_annotation_in,
+            TABIX_TABIX_MERGE.out.tbi,
+            ch_cadd_header,
+            ch_cadd_scores
+        )
+
+        // Pick input for vep
+        ch_annotation_in
+            .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null"))
+            .branch { it  ->
+                merged: it[2].equals("null")
+                    return [it[0], it[1]]
+                cadd: !(it[2].equals("null"))
+                    return [it[2], it[3]]
+            }
+            .set { ch_for_mix }
+        ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd)
 
         // Annotating with ensembl Vep
-        ENSEMBLVEP_MT( ch_in_vep,
+        ENSEMBLVEP_MT(
+            ch_vep_in,
             val_vep_genome,
             "homo_sapiens",
             val_vep_cache_version,
             ch_vep_cache,
             ch_genome_fasta,
-            [])
+            []
+        )
 
         // Running vcfanno
         TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz)
@@ -107,13 +136,14 @@ workflow MERGE_ANNOTATE_MT {
         ch_tbi_out = ZIP_TABIX_VCFANNO.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] }
 
         // Running haplogrep2
-        HAPLOGREP2_CLASSIFY_MT(ch_in_vep, "vcf.gz")
+        HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz")
 
         ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first())
         ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first())
         ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first())
         ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first())
         ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions)
+        ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
         ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions)
         ch_versions = ch_versions.mix(VCFANNO_MT.out.versions)
         ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions)
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 755cad42..a3bdb7b0 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -405,6 +405,8 @@ workflow RAREDISEASE {
     if (!params.skip_mt_analysis) {
         ANALYSE_MT (
             ch_mapped.bam_bai,
+            ch_cadd_header,
+            ch_cadd_scores,
             ch_bwa_index,
             ch_bwamem2_index,
             ch_genome_fasta_meta,

From 988e3ea3bc2e7f4471e973f1cc7e7769d2ec04cc Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 4 Apr 2023 01:36:17 +0200
Subject: [PATCH 08/15] update when

---
 CITATIONS.md                      | 6 ++++++
 conf/modules/annotate_cadd.config | 2 +-
 nextflow.config                   | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index b4bc88d1..ec755b55 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -22,6 +22,12 @@
 
   > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE; 2019:314-324. doi:10.1109/IPDPS.2019.00041
 
+- [CADD<sup>1</sup>](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00835-9)<sup>,</sup> [<sup>2</sup>](https://academic.oup.com/nar/article/47/D1/D886/5146191)
+
+  > Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9
+
+  > Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016
+
 - [DeepVariant](https://www.nature.com/articles/nbt.4235)
 
   > Poplin R, Chang PC, Alexander D, et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol. 2018;36(10):983-987. doi:10.1038/nbt.4235
diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index 5c510214..309e3408 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -17,7 +17,7 @@
 
 process {
     withName: '.*:ANNOTATE_CADD.*' {
-        ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
+        ext.when = { !(params.cadd_annotation == null) || !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
     }
 
     withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
diff --git a/nextflow.config b/nextflow.config
index 39ba2245..ab2e058d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -26,6 +26,7 @@ params {
     skip_sv_annotation         = false
     skip_mt_analysis           = false
     gens_switch                = false
+    cadd_annotation            = null
     platform                   = 'illumina'
 
     // Alignment

From 886c3b2b4b62eb1399808605b10632c6dd5a6ebd Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 4 Apr 2023 02:14:55 +0200
Subject: [PATCH 09/15] add cadd nf-core

---
 conf/modules/annotate_cadd.config             |  4 +-
 modules.json                                  |  5 ++
 .../{local/cadd.nf => nf-core/cadd/main.nf}   |  0
 modules/nf-core/cadd/meta.yml                 | 48 +++++++++++++++++++
 .../local/annotation/annotate_cadd.nf         |  2 +-
 5 files changed, 56 insertions(+), 3 deletions(-)
 rename modules/{local/cadd.nf => nf-core/cadd/main.nf} (100%)
 create mode 100644 modules/nf-core/cadd/meta.yml

diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index 309e3408..e2962a50 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -17,8 +17,8 @@
 
 process {
     withName: '.*:ANNOTATE_CADD.*' {
-        ext.when = { !(params.cadd_annotation == null) || !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
-    }
+        ext.when = { (params.cadd_annotation != null) && ( !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun) }
+   }
 
     withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
         ext.args   = { "--output-type z --types indels" }
diff --git a/modules.json b/modules.json
index 092d75a1..2ed3a685 100644
--- a/modules.json
+++ b/modules.json
@@ -60,6 +60,11 @@
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     },
+                    "cadd": {
+                        "branch": "master",
+                        "git_sha": "2e2f8581f4d2ab4729c2b7bd5da8400b54fb8fdf",
+                        "installed_by": ["modules"]
+                    },
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
diff --git a/modules/local/cadd.nf b/modules/nf-core/cadd/main.nf
similarity index 100%
rename from modules/local/cadd.nf
rename to modules/nf-core/cadd/main.nf
diff --git a/modules/nf-core/cadd/meta.yml b/modules/nf-core/cadd/meta.yml
new file mode 100644
index 00000000..0dd0fb26
--- /dev/null
+++ b/modules/nf-core/cadd/meta.yml
@@ -0,0 +1,48 @@
+name: "cadd"
+description: CADD is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome.
+keywords:
+  - cadd
+  - annotate
+tools:
+  - "cadd":
+      description: "CADD scripts release for offline scoring"
+      homepage: "https://cadd.gs.washington.edu/"
+      documentation: "https://github.com/kircherlab/CADD-scripts/blob/master/README.md"
+      tool_dev_url: "https://github.com/kircherlab/CADD-scripts/"
+      doi: "10.1093/nar/gky1016"
+      licence: "['Restricted. Free for non-commercial users.']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - vcf:
+      type: file
+      description: Input file for annotation in vcf or vcf.gz format
+      pattern: "*.{vcf,vcf.gz}"
+  - annotation_dir:
+      type: file
+      description: |
+        Path to folder containing the vcf files with precomputed CADD scores.
+        This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
+      pattern: "*.{vcf,vcf.gz}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - tsv:
+      type: file
+      description: Annotated tsv file
+      pattern: "*.{tsv,tsv.gz}"
+
+authors:
+  - "@ramprasadn"
diff --git a/subworkflows/local/annotation/annotate_cadd.nf b/subworkflows/local/annotation/annotate_cadd.nf
index d0d77cd2..0a00362e 100644
--- a/subworkflows/local/annotation/annotate_cadd.nf
+++ b/subworkflows/local/annotation/annotate_cadd.nf
@@ -4,7 +4,7 @@
 
 include { BCFTOOLS_ANNOTATE             } from '../../../modules/nf-core/bcftools/annotate/main'
 include { BCFTOOLS_VIEW                 } from '../../../modules/nf-core/bcftools/view/main'
-include { CADD                          } from '../../../modules/local/cadd'
+include { CADD                          } from '../../../modules/nf-core/cadd/main'
 include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main'
 include { TABIX_TABIX as TABIX_CADD     } from '../../../modules/nf-core/tabix/tabix/main'
 include { TABIX_TABIX as TABIX_VIEW     } from '../../../modules/nf-core/tabix/tabix/main'

From df3986d9810d422f137d453d7e96e6b54c749731 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 4 Apr 2023 02:24:19 +0200
Subject: [PATCH 10/15] fix editorlint err

---
 conf/modules/annotate_cadd.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index e2962a50..a5be41d0 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -18,7 +18,7 @@
 process {
     withName: '.*:ANNOTATE_CADD.*' {
         ext.when = { (params.cadd_annotation != null) && ( !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun) }
-   }
+    }
 
     withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
         ext.args   = { "--output-type z --types indels" }

From 92cabc7bbca6124eee909c9734e4d85b091725e9 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 4 Apr 2023 22:50:22 +0200
Subject: [PATCH 11/15] update usage docs

---
 docs/usage.md                                  | 4 ++++
 subworkflows/local/annotation/annotate_cadd.nf | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 65e13464..07f598ca 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -196,6 +196,7 @@ The mandatory and optional parameters for each category are tabulated below.
 | vcfanno_toml<sup>3</sup>      | vcfanno_lua                    |
 | vep_cache_version             | vep_filters<sup>6</sup>        |
 | vep_cache                     | score_config_snv<sup>7</sup>   |
+|                               | cadd_annotation<sup>8</sup>    |
 
 <sup>1</sup>Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.<br />
 <sup>2</sup>Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).<br />
@@ -204,6 +205,9 @@ The mandatory and optional parameters for each category are tabulated below.
 <sup>5</sup>Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).<br />
 <sup>6</sup> This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt).<br />
 <sup>7</sup>Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini).<br />
+<sup>8</sup>Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels. <br />
+
+> NB: We use CADD only to annotate small indels. To annotate SNVs with precomputed CADD scores, pass the file containing CADD scores as a resource to vcfanno instead. Files containing the precomputed CADD scores for SNVs can be downloaded from [here](https://cadd.gs.washington.edu/download) (description: "All possible SNVs of GRCh3<7/8>/hg3<7/8>")
 
 ##### 7. SV annotation & Ranking
 
diff --git a/subworkflows/local/annotation/annotate_cadd.nf b/subworkflows/local/annotation/annotate_cadd.nf
index 0a00362e..f75fb18d 100644
--- a/subworkflows/local/annotation/annotate_cadd.nf
+++ b/subworkflows/local/annotation/annotate_cadd.nf
@@ -28,8 +28,8 @@ workflow ANNOTATE_CADD {
 
         TABIX_CADD(CADD.out.tsv)
 
-        BCFTOOLS_VIEW.out.vcf
-            .join(TABIX_VIEW.out.tbi)
+        ch_vcf
+            .join(ch_index)
             .join(CADD.out.tsv)
             .join(TABIX_CADD.out.tbi)
             .groupTuple()

From 0df747cffebb63afa672ca1a8f113a7ae5d365a5 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 5 Apr 2023 00:57:00 +0200
Subject: [PATCH 12/15] update output

---
 docs/output.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index bafda861..39f03012 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -40,6 +40,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [Annotation - SNV](#annotation---snv)
   - [bcftools roh](#bcftools-roh)
   - [vcfanno](#vcfanno)
+  - [CADD](#cadd)
   - [VEP](#vep)
 - [Annotation - SV](#annotation---sv)
   - [SVDB query](#svdb-query)
@@ -296,7 +297,13 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
 
 #### vcfanno
 
-[vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple conf file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder by default, and is passed to vep for further annotation.
+[vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple conf file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder by default, and is passed to CADD and/or VEP for further annotation.
+
+We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files can be downloaded from [here](https://cadd.gs.washington.edu/download)).
+
+#### CADD
+
+[CADD](https://cadd.gs.washington.edu/) is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. In nf-core/raredisease, SNVs can be annotated with precomputed CADD scores using vcfanno. However, for small indels they will be calculated on the fly by CADD. The output files are not published in the output folder by default, and is passed to VEP for further annotation.
 
 #### VEP
 

From 4ea822e1efe742d17a74208e4e3d396e227377ce Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 5 Apr 2023 01:06:29 +0200
Subject: [PATCH 13/15] update readme and output

---
 README.md      | 2 ++
 docs/output.md | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/README.md b/README.md
index a55e0d37..ebe7aea5 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 - [bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh)
 - [vcfanno](https://github.com/brentp/vcfanno)
+- [CADD](https://cadd.gs.washington.edu/)
 - [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)
 
 **6. Annotation - SV:**
@@ -72,6 +73,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 - Annotation:
   - [HaploGrep2](https://github.com/seppinho/haplogrep-cmd)
   - [vcfanno](https://github.com/brentp/vcfanno)
+  - [CADD](https://cadd.gs.washington.edu/)
   - [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)
 
 **8. Variant calling - repeat expansions:**
diff --git a/docs/output.md b/docs/output.md
index 39f03012..5c890d9c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -378,6 +378,12 @@ The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sen
 
 [vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple conf file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder by default, and is passed to vep for further annotation.
 
+We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files can be downloaded from [here](https://cadd.gs.washington.edu/download)).
+
+#### CADD
+
+[CADD](https://cadd.gs.washington.edu/) is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. In nf-core/raredisease, SNVs can be annotated with precomputed CADD scores using vcfanno. However, for small indels they will be calculated on the fly by CADD. The output files are not published in the output folder by default, and is passed to VEP for further annotation.
+
 ##### VEP
 
 [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) determines the effect of your variants on genes, transcripts, and protein sequence, as well as regulatory regions.

From 3926a7c0b30cdee7a1dedc918fda233cc093f20d Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 25 Apr 2023 11:19:05 +0200
Subject: [PATCH 14/15] review suggestions

---
 assets/cadd_to_vcf_header_-1.0-.txt                  |  2 +-
 subworkflows/local/analyse_MT.nf                     |  4 ++--
 subworkflows/local/annotate_snvs.nf                  | 10 +++++-----
 subworkflows/local/annotation/annotate_cadd.nf       | 10 +++++-----
 subworkflows/local/mitochondria/merge_annotate_MT.nf |  4 ++--
 workflows/raredisease.nf                             |  6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/assets/cadd_to_vcf_header_-1.0-.txt b/assets/cadd_to_vcf_header_-1.0-.txt
index 5d244377..8deee482 100644
--- a/assets/cadd_to_vcf_header_-1.0-.txt
+++ b/assets/cadd_to_vcf_header_-1.0-.txt
@@ -1 +1 @@
-##INFO=<ID=CADD,Number=1,Type=Float,Description="The CADD relative score for this alternative.">
+##INFO=<ID=CADD,Number=1,Type=Float,Description="PHRED-like scaled CADD score.">
diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf
index a4c2cd26..c46b82c9 100644
--- a/subworkflows/local/analyse_MT.nf
+++ b/subworkflows/local/analyse_MT.nf
@@ -11,7 +11,7 @@ workflow ANALYSE_MT {
     take:
         ch_bam                    // channel: [mandatory] [ val(meta), file(bam), file(bai) ]
         ch_cadd_header            // channel: [mandatory] [ path(txt) ]
-        ch_cadd_scores            // channel: [mandatory] [ path(annotation) ]
+        ch_cadd_resources         // channel: [mandatory] [ path(annotation) ]
         ch_genome_bwa_index       // channel: [mandatory] [ path(index) ]
         ch_genome_bwamem2_index   // channel: [mandatory] [ path(index) ]
         ch_genome_fasta_meta      // channel: [mandatory] [ val(meta), path(fasta) ]
@@ -81,7 +81,7 @@ workflow ANALYSE_MT {
             ALIGN_AND_CALL_MT.out.vcf,
             PICARD_LIFTOVERVCF.out.vcf_lifted,
             ch_cadd_header,
-            ch_cadd_scores,
+            ch_cadd_resources,
             ch_genome_fasta_no_meta,
             ch_genome_dict_meta,
             ch_genome_dict_no_meta,
diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_snvs.nf
index 74da97de..be1392d7 100644
--- a/subworkflows/local/annotate_snvs.nf
+++ b/subworkflows/local/annotate_snvs.nf
@@ -22,7 +22,7 @@ workflow ANNOTATE_SNVS {
         ch_vcf                // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ]
         analysis_type         // string: [mandatory] 'wgs' or 'wes'
         ch_cadd_header        // channel: [mandatory] [ path(txt) ]
-        ch_cadd_scores        // channel: [mandatory] [ path(annotation) ]
+        ch_cadd_resources     // channel: [mandatory] [ path(annotation) ]
         ch_vcfanno_resources  // channel: [mandatory] [ path(resources) ]
         ch_vcfanno_lua        // channel: [mandatory] [ path(lua) ]
         ch_vcfanno_toml       // channel: [mandatory] [ path(toml) ]
@@ -85,13 +85,13 @@ workflow ANNOTATE_SNVS {
             GATK4_SELECTVARIANTS.out.vcf,
             GATK4_SELECTVARIANTS.out.tbi,
             ch_cadd_header,
-            ch_cadd_scores
+            ch_cadd_resources
         )
 
-        // Pick input for VEP
+        // If CADD is run, pick CADD output as input for VEP else pass selectvariants output to VEP.
         GATK4_SELECTVARIANTS.out.vcf
-            .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null"))
-            .branch { it  ->
+            .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) // If CADD is not run then this channel will be empty, so assign a default value to allow filtering with branch operator
+            .branch { it  ->                                // If CADD is run, then "it" will be [[meta],selvar.vcf,[meta],cadd.vcf], else [[meta],selvar.vcf,null]
                 selvar: it[2].equals("null")
                     return [it[0], it[1]]
                 cadd: !(it[2].equals("null"))
diff --git a/subworkflows/local/annotation/annotate_cadd.nf b/subworkflows/local/annotation/annotate_cadd.nf
index f75fb18d..9e49c01f 100644
--- a/subworkflows/local/annotation/annotate_cadd.nf
+++ b/subworkflows/local/annotation/annotate_cadd.nf
@@ -12,10 +12,10 @@ include { TABIX_TABIX as TABIX_VIEW     } from '../../../modules/nf-core/tabix/t
 workflow ANNOTATE_CADD {
 
     take:
-        ch_vcf         // channel: [mandatory] [ val(meta), path(vcfs) ]
-        ch_index       // channel: [mandatory] [ val(meta), path(tbis) ]
-        ch_header      // channel: [mandatory] [ path(txt) ]
-        ch_cadd_scores // channel: [mandatory] [ path(dir) ]
+        ch_vcf            // channel: [mandatory] [ val(meta), path(vcfs) ]
+        ch_index          // channel: [mandatory] [ val(meta), path(tbis) ]
+        ch_header         // channel: [mandatory] [ path(txt) ]
+        ch_cadd_resources // channel: [mandatory] [ path(dir) ]
 
     main:
         ch_versions       = Channel.empty()
@@ -24,7 +24,7 @@ workflow ANNOTATE_CADD {
 
         TABIX_VIEW(BCFTOOLS_VIEW.out.vcf)
 
-        CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_scores)
+        CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources)
 
         TABIX_CADD(CADD.out.tsv)
 
diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf
index d904f500..2e17f71d 100644
--- a/subworkflows/local/mitochondria/merge_annotate_MT.nf
+++ b/subworkflows/local/mitochondria/merge_annotate_MT.nf
@@ -22,7 +22,7 @@ workflow MERGE_ANNOTATE_MT {
         ch_vcf1                // channel: [mandatory] [ val(meta), path(vcf) ]
         ch_vcf2                // channel: [mandatory] [ val(meta), path(vcf) ]
         ch_cadd_header         // channel: [mandatory] [ path(txt) ]
-        ch_cadd_scores         // channel: [mandatory] [ path(annotation) ]
+        ch_cadd_resources      // channel: [mandatory] [ path(annotation) ]
         ch_genome_fasta        // channel: [mandatory] [ path(fasta) ]
         ch_genome_dict_meta    // channel: [mandatory] [ val(meta), path(dict) ]
         ch_genome_dict_no_meta // channel: [mandatory] [ path(dict) ]
@@ -99,7 +99,7 @@ workflow MERGE_ANNOTATE_MT {
             ch_annotation_in,
             TABIX_TABIX_MERGE.out.tbi,
             ch_cadd_header,
-            ch_cadd_scores
+            ch_cadd_resources
         )
 
         // Pick input for vep
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index a3bdb7b0..20f349e1 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -142,7 +142,7 @@ workflow RAREDISEASE {
 
     // Initialize all file channels including unprocessed vcf, bed and tab files
     ch_cadd_header                    = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
-    ch_cadd_scores                    = params.cadd_annotation                ? Channel.fromPath(params.cadd_annotation).collect()
+    ch_cadd_resources                 = params.cadd_annotation                ? Channel.fromPath(params.cadd_annotation).collect()
                                                                               : Channel.value([])
     ch_call_interval                  = params.call_interval                  ? Channel.fromPath(params.call_interval).collect()
                                                                               : Channel.value([])
@@ -406,7 +406,7 @@ workflow RAREDISEASE {
         ANALYSE_MT (
             ch_mapped.bam_bai,
             ch_cadd_header,
-            ch_cadd_scores,
+            ch_cadd_resources,
             ch_bwa_index,
             ch_bwamem2_index,
             ch_genome_fasta_meta,
@@ -455,7 +455,7 @@ workflow RAREDISEASE {
             ch_vcf,
             params.analysis_type,
             ch_cadd_header,
-            ch_cadd_scores,
+            ch_cadd_resources,
             ch_vcfanno_resources,
             ch_vcfanno_lua,
             ch_vcfanno_toml,

From b702fd2bcc549d0997b46d499bfe60b2aa792739 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 25 Apr 2023 11:22:51 +0200
Subject: [PATCH 15/15] change params name

---
 conf/modules/annotate_cadd.config | 2 +-
 docs/usage.md                     | 2 +-
 main.nf                           | 2 +-
 nextflow.config                   | 2 +-
 nextflow_schema.json              | 2 +-
 workflows/raredisease.nf          | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config
index a5be41d0..2535f7f9 100644
--- a/conf/modules/annotate_cadd.config
+++ b/conf/modules/annotate_cadd.config
@@ -17,7 +17,7 @@
 
 process {
     withName: '.*:ANNOTATE_CADD.*' {
-        ext.when = { (params.cadd_annotation != null) && ( !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun) }
+        ext.when = { (params.cadd_resources != null) && ( !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun) }
     }
 
     withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' {
diff --git a/docs/usage.md b/docs/usage.md
index 07f598ca..79765757 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -196,7 +196,7 @@ The mandatory and optional parameters for each category are tabulated below.
 | vcfanno_toml<sup>3</sup>      | vcfanno_lua                    |
 | vep_cache_version             | vep_filters<sup>6</sup>        |
 | vep_cache                     | score_config_snv<sup>7</sup>   |
-|                               | cadd_annotation<sup>8</sup>    |
+|                               | cadd_resources<sup>8</sup>     |
 
 <sup>1</sup>Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.<br />
 <sup>2</sup>Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).<br />
diff --git a/main.nf b/main.nf
index fc1c6ca4..fad9912e 100644
--- a/main.nf
+++ b/main.nf
@@ -23,7 +23,7 @@ params.fasta_fai                      = WorkflowMain.getGenomeAttribute(params,
 params.bwa                            = WorkflowMain.getGenomeAttribute(params, 'bwa')
 params.bwamem2                        = WorkflowMain.getGenomeAttribute(params, 'bwamem2')
 params.call_interval                  = WorkflowMain.getGenomeAttribute(params, 'call_interval')
-params.cadd_annotation                = WorkflowMain.getGenomeAttribute(params, 'cadd_annotation')
+params.cadd_resources                 = WorkflowMain.getGenomeAttribute(params, 'cadd_resources')
 params.gnomad_af                      = WorkflowMain.getGenomeAttribute(params, 'gnomad_af')
 params.gnomad_af_idx                  = WorkflowMain.getGenomeAttribute(params, 'gnomad_af_idx')
 params.intervals_wgs                  = WorkflowMain.getGenomeAttribute(params, 'intervals_wgs')
diff --git a/nextflow.config b/nextflow.config
index 329a1e1d..6cfcaa40 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -26,7 +26,7 @@ params {
     skip_sv_annotation         = false
     skip_mt_analysis           = false
     gens_switch                = false
-    cadd_annotation            = null
+    cadd_resources             = null
     platform                   = 'illumina'
 
     // Alignment
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4adcefe2..b25b5d73 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -103,7 +103,7 @@
                     "fa_icon": "fas fa-folder-open",
                     "hidden": true
                 },
-                "cadd_annotation": {
+                "cadd_resources": {
                     "type": "string",
                     "format": "directory-path",
                     "fa_icon": "fas fa-file",
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 20f349e1..55bc0bd0 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -14,7 +14,7 @@ def checkPathParamList = [
     params.bwa,
     params.bwamem2,
     params.call_interval,
-    params.cadd_annotation,
+    params.cadd_resources,
     params.fasta,
     params.fasta_fai,
     params.gens_gnomad_pos,
@@ -142,7 +142,7 @@ workflow RAREDISEASE {
 
     // Initialize all file channels including unprocessed vcf, bed and tab files
     ch_cadd_header                    = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
-    ch_cadd_resources                 = params.cadd_annotation                ? Channel.fromPath(params.cadd_annotation).collect()
+    ch_cadd_resources                 = params.cadd_resources                 ? Channel.fromPath(params.cadd_resources).collect()
                                                                               : Channel.value([])
     ch_call_interval                  = params.call_interval                  ? Channel.fromPath(params.call_interval).collect()
                                                                               : Channel.value([])