8000 Add parameters to skip snv calling and repeat analysis by ramprasadn · Pull Request #571 · nf-core/raredisease · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Add parameters to skip snv calling and repeat analysis #571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571)
- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570)

### `Changed`
Expand All @@ -24,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| ------------- | --------------------- |
| | mbuffer_mem |
| | samtools_sort_threads |
| | skip_repeat_analysis |
| | skip_snv_calling |

## 2.1.0 - Obelix [2024-05-29]

Expand Down
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ params {
skip_me_annotation = false
skip_mt_annotation = false
skip_qualimap = false
skip_repeat_analysis = false
skip_snv_annotation = false
skip_snv_calling = false
skip_sv_annotation = false
skip_me_annotation = false
skip_mt_annotation = false
skip_mt_subsample = false
skip_vcf2cytosure = true
skip_vep_filter = false
Expand Down
10 changes: 10 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -568,11 +568,21 @@
"description": "Specifies whether or not to subsample mt alignment.",
"fa_icon": "fas fa-toggle-on"
},
"skip_repeat_analysis": {
"type": "boolean",
"description": "Specifies whether or not to skip calling and annotation of repeat expansions.",
"fa_icon": "fas fa-toggle-on"
},
"skip_snv_annotation": {
"type": "boolean",
"description": "Specifies whether or not to skip annotate SNV subworkflow.",
"fa_icon": "fas fa-toggle-on"
},
"skip_snv_calling": {
"type": "boolean",
"description": "Specifies whether or not to skip nuclear and mitochondrial SNV calling and annotation.",
"fa_icon": "fas fa-toggle-on"
},
"skip_sv_annotation": {
"type": "boolean",
"description": "Specifies whether or not to skip annotate structural variant subworkflow.",
Expand Down
288 changes: 156 additions & 132 deletions workflows/raredisease.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def mandatoryParams = [
"intervals_wgs",
"intervals_y",
"platform",
"variant_catalog",
"variant_caller"
]
def missingParamsCount = 0
Expand All @@ -32,6 +31,14 @@ if (params.run_rtgvcfeval) {
mandatoryParams += ["rtg_truthvcfs"]
}

if (!params.skip_repeat_analysis) {
mandatoryParams += ["variant_catalog"]
}

if (!params.skip_snv_calling) {
mandatoryParams += ["genome"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

genome is also used by smncopynumbercaller, but maybe you're adding that in a later PR :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yupe! I have a couple more PR's in line to be merged after this :D

}

if (!params.skip_snv_annotation) {
mandatoryParams += ["genome", "vcfanno_resources", "vcfanno_toml", "vep_cache", "vep_cache_version",
"gnomad_af", "score_config_snv", "variant_consequences_snv"]
Expand Down Expand Up @@ -355,9 +362,12 @@ workflow RAREDISEASE {

ch_scatter_split_intervals = ch_scatter.split_intervals ?: Channel.empty()

//
// ALIGNING READS, FETCH STATS, AND MERGE.
//
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ALIGN & FETCH STATS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

ALIGN (
ch_samplesheet,
ch_genome_fasta,
Expand Down Expand Up @@ -406,10 +416,13 @@ workflow RAREDISEASE {
)
ch_versions = ch_versions.mix(QC_BAM.out.versions)

//
// EXPANSIONHUNTER AND STRANGER
//
if (params.analysis_type.equals("wgs")) {
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CALL AND ANNOTATE REPEAT EXPANSIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

if (!params.skip_repeat_analysis && params.analysis_type.equals("wgs") ) {
CALL_REPEAT_EXPANSIONS (
ch_mapped.genome_bam_bai,
ch_variant_catalog,
Expand All @@ -420,49 +433,132 @@ workflow RAREDISEASE {
ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
}

//
// SNV CALLING
//
CALL_SNV (
ch_mapped.genome_bam_bai,
ch_mapped.mt_bam_bai,
ch_mapped.mtshift_bam_bai,
ch_genome_chrsizes,
ch_genome_fasta,
ch_genome_fai,
ch_genome_dictionary,
ch_mt_intervals,
ch_mtshift_fasta,
ch_mtshift_fai,
ch_mtshift_dictionary,
ch_mtshift_intervals,
ch_mtshift_backchain,
ch_dbsnp,
ch_dbsnp_tbi,
ch_call_interval,
ch_ml_model,
ch_case_info,
ch_foundin_header,
Channel.value(params.sentieon_dnascope_pcr_indel_model)
)
ch_versions = ch_versions.mix(CALL_SNV.out.versions)

//
// VARIANT EVALUATION
//
if (params.run_rtgvcfeval) {
VARIANT_EVALUATION (
CALL_SNV.out.genome_vcf_tabix,
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SNVs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

if (!params.skip_snv_calling) {
CALL_SNV (
ch_mapped.genome_bam_bai,
ch_mapped.mt_bam_bai,
ch_mapped.mtshift_bam_bai,
ch_genome_chrsizes,
ch_genome_fasta,
ch_genome_fai,
ch_rtg_truthvcfs,
ch_sdf
ch_genome_dictionary,
ch_mt_intervals,
ch_mtshift_fasta,
ch_mtshift_fai,
ch_mtshift_dictionary,
ch_mtshift_intervals,
ch_mtshift_backchain,
ch_dbsnp,
ch_dbsnp_tbi,
ch_call_interval,
ch_ml_model,
ch_case_info,
ch_foundin_header,
Channel.value(params.sentieon_dnascope_pcr_indel_model)
)
ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions)
ch_versions = ch_versions.mix(CALL_SNV.out.versions)

//
// ANNOTATE GENOME SNVs
//
if (!params.skip_snv_annotation) {

ANNOTATE_GENOME_SNVS (
CALL_SNV.out.genome_vcf_tabix,
params.analysis_type,
ch_cadd_header,
ch_cadd_resources,
ch_vcfanno_resources,
ch_vcfanno_lua,
ch_vcfanno_toml,
params.genome,
params.vep_cache_version,
ch_vep_cache,
ch_genome_fasta,
ch_gnomad_af,
ch_samples,
ch_scatter_split_intervals,
ch_vep_extra_files,
ch_genome_chrsizes
).set { ch_snv_annotate }
ch_versions = ch_versions.mix(ch_snv_annotate.versions)

GENERATE_CLINICAL_SET_SNV(
ch_snv_annotate.vcf_ann,
ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions)

ANN_CSQ_PLI_SNV (
GENERATE_CLINICAL_SET_SNV.out.vcf,
ch_variant_consequences_snv
)
ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions)

RANK_VARIANTS_SNV (
ANN_CSQ_PLI_SNV.out.vcf_ann,
ch_pedfile,
ch_reduced_penetrance,
ch_score_config_snv
)
ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)
}

//
// ANNOTATE MT SNVs
//
if (!params.skip_mt_annotation && (params.run_mt_for_wes || params.analysis_type.equals("wgs"))) {

ANNOTATE_MT_SNVS (
CALL_SNV.out.mt_vcf,
CALL_SNV.out.mt_tabix,
ch_cadd_header,
ch_cadd_resources,
ch_genome_fasta,
ch_vcfanno_resources,
ch_vcfanno_toml,
params.genome,
params.vep_cache_version,
ch_vep_cache,
ch_vep_extra_files
).set { ch_mt_annotate }
ch_versions = ch_versions.mix(ch_mt_annotate.versions)

GENERATE_CLINICAL_SET_MT(
ch_mt_annotate.vcf_ann,
ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions)

ANN_CSQ_PLI_MT(
GENERATE_CLINICAL_SET_MT.out.vcf,
ch_variant_consequences_snv
)
ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions)

RANK_VARIANTS_MT (
ANN_CSQ_PLI_MT.out.vcf_ann,
ch_pedfile,
ch_reduced_penetrance,
ch_score_config_mt
)
ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions)
}
}

//
// SV CALLING
//
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SVs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

CALL_STRUCTURAL_VARIANTS (
ch_mapped.genome_marked_bam,
ch_mapped.genome_marked_bai,
Expand Down Expand Up @@ -522,94 +618,8 @@ workflow RAREDISEASE {

}

//
// ANNOTATE GENOME SNVs
//
if (!params.skip_snv_annotation) {

ANNOTATE_GENOME_SNVS (
CALL_SNV.out.genome_vcf_tabix,
params.analysis_type,
ch_cadd_header,
ch_cadd_resources,
ch_vcfanno_resources,
ch_vcfanno_lua,
ch_vcfanno_toml,
params.genome,
params.vep_cache_version,
ch_vep_cache,
ch_genome_fasta,
ch_gnomad_af,
ch_samples,
ch_scatter_split_intervals,
ch_vep_extra_files,
ch_genome_chrsizes
).set { ch_snv_annotate }
ch_versions = ch_versions.mix(ch_snv_annotate.versions)

GENERATE_CLINICAL_SET_SNV(
ch_snv_annotate.vcf_ann,
ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions)

ANN_CSQ_PLI_SNV (
GENERATE_CLINICAL_SET_SNV.out.vcf,
ch_variant_consequences_snv
)
ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions)

RANK_VARIANTS_SNV (
ANN_CSQ_PLI_SNV.out.vcf_ann,
ch_pedfile,
ch_reduced_penetrance,
ch_score_config_snv
)
ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)

}

//
// ANNOTATE MT SNVs
//
if (!params.skip_mt_annotation && (params.run_mt_for_wes || params.analysis_type.equals("wgs"))) {

ANNOTATE_MT_SNVS (
CALL_SNV.out.mt_vcf,
CALL_SNV.out.mt_tabix,
ch_cadd_header,
ch_cadd_resources,
ch_genome_fasta,
ch_vcfanno_resources,
ch_vcfanno_toml,
params.genome,
params.vep_cache_version,
ch_vep_cache,
ch_vep_extra_files
).set { ch_mt_annotate }
ch_versions = ch_versions.mix(ch_mt_annotate.versions)

GENERATE_CLINICAL_SET_MT(
ch_mt_annotate.vcf_ann,
ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions)

ANN_CSQ_PLI_MT(
GENERATE_CLINICAL_SET_MT.out.vcf,
ch_variant_consequences_snv
)
ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions)

RANK_VARIANTS_MT (
ANN_CSQ_PLI_MT.out.vcf_ann,
ch_pedfile,
ch_reduced_penetrance,
ch_score_config_mt
)
ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions)

}

// STEP 1.7: SMNCOPYNUMBERCALLER
RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output
Expand Down Expand Up @@ -710,6 +720,20 @@ workflow RAREDISEASE {

}
}

//
// VARIANT EVALUATION
//
if (params.run_rtgvcfeval) {
VARIANT_EVALUATION (
CALL_SNV.out.genome_vcf_tabix,
ch_genome_fai,
ch_rtg_truthvcfs,
ch_sdf
)
ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions)
}

//
// Collate and save software versions
//
Expand Down
0