8000 Blc supplementaryanalysis by bcantarel · Pull Request #30 · formbio/laava · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Blc supplementaryanalysis #30

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
45 changes: 45 additions & 0 deletions bin/bamqc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash
#trimgalore.sh

usage() {
echo "-h Help documentation for bamqc.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-b --BAM File"
echo "-p --Prefix for output file name"
echo "Example: bash bamqc.sh -p prefix -r ref.tar.gz -b SRR1551047.bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:b:c:p:h opt
do
case $opt in
b) sbam=$OPTARG;;
p) prefix=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))

if [[ -z $threads ]]
then
threads=`nproc`
fi

issorted=$(samtools stats -@ ${threads} ${sbam} | grep "is sorted: 1")
if [[ -z $issorted ]]
then
samtools sort -T ./ -@ ${threads} -o ${prefix}.sort.bam ${sbam}
sbam="${prefix}.sort.bam"
fi

samtools view -@ ${threads} -F 2304 -1 -o ${prefix}.filt.bam ${sbam}
sbam="${prefix}.filt.bam"
samtools index -@ ${threads} ${sbam}

fastqc -f bam ${sbam}
samtools flagstat -@ $threads ${sbam} > ${prefix}.flagstat.txt
samtools idxstats -@ $threads ${sbam} > ${prefix}.idxstat.txt
samtools stats -@ $threads ${sbam} > ${prefix}.stat.txt
samtools coverage -o ${prefix}.coverage.txt ${sbam}

qualimap bamqc -bam ${sbam} --java-mem-size=18G -outdir ${prefix}_qualimap > ${prefix}_bamqc_log.txt
35 changes: 35 additions & 0 deletions bin/hostgenect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
#bamcount.sh

usage() {
echo "-h Help documentation for bamcount.sh"
echo "-p --Prefix for output file name"
echo "-t --threads (number of CPUs)"
echo "Example: bash bamcount.sh -p prefix SRR1551047.bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :p:e:t:h opt
do
case $opt in
p) prefix=$OPTARG;;
e) exonbed=$OPTARG;;
t) threads=$OPTARG;;
h) usage;;
esac
done

shift $(($OPTIND -1))

if [[ -z $threads ]]
then
threads=`nproc`
fi
if [[ -z $prefix ]]
then
prefix=$(echo $sbam | cut -d. -f1)
fi
sbam=$@
bedtools merge -i ${exonbed} -o distinct -c 4 > exons.merged.bed
bedtools coverage -b ${sbam} -a exons.merged.bed > ${prefix}.bedout.txt
merge_bedcov.pl $prefix ${prefix}.bedout.txt
1 change: 1 addition & 0 deletions bin/map_reads.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ echo
grep '^>' all_refs.fa
echo
8000
threads=$(nproc)
if [[ $reads == *.bam ]]; then
echo "Converting $reads from BAM to FASTQ"
samtools fastq -n -0 reads.fq "$reads"
Expand Down
30 changes: 30 additions & 0 deletions bin/merge_bedcov.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/perl -w
#merge_megadepthcts.pl

my $prefix = shift @ARGV;
my $ctfile = shift @ARGV;

my %gene;
open BED, "<$ctfile" or die $!;
while (my $line = <BED>) {
chomp($line);
my ($chr,$start,$end,$name,$numreads,$numbases,$length,$fracov) = split(/\t/,$line);
my $key = $chr.':'.$start.'-'.$end;
my @names = split(/,/, $name);
my %uni;
foreach $exon (@names) {
my ($gene,$ensembl,$trxid,$exonnum,$strand) = split(/\|/,$exon);
next unless ($ensembl);
$uni{$ensembl} = $gene;
}
foreach $ens (keys %uni) {
$sym = $uni{$ens};
$genect{join("|",$ens,$sym)} += $numreads;
}
}

open OUT, ">$prefix\.bedtools.cov.txt" or die $!;
foreach $gene (keys %genect) {
my ($ens,$sym) = split(/\|/,$gene);
print OUT join("\t",$ens,$sym,$genect{$gene}),"\n";
}
29 changes: 29 additions & 0 deletions laavasupp.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
FROM ubuntu:22.04

ENV QUALIMAP_VER="2.3"
ENV FASTQC_VER="0.12.1"
#Build Qualimap Environment variable

RUN apt-get update; apt-get install -y autoconf automake build-essential gcc checkinstall; apt-get upgrade; \
DEBIAN_FRONTEND="noninteractive" apt-get install -y --allow-unauthenticated libxml2-dev libssl-dev libcurl4-gnutls-dev wget unzip git default-jre default-jdk r-base pigz parallel bzip2 curl gcc libc6-dev make zlib1g samtools bedtools fastqc && apt-get clean && rm -rf /var/lib/apt/lists/*

RUN apt-get update; apt-get remove -y ghostscript poppler-data fonts-urw-base35 libjbig2dec0 libpulse0

RUN apt update -qq; DEBIAN_FRONTEND="noninteractive" apt install -y --allow-unauthenticated software-properties-common dirmngr

RUN add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu/focal-cran40/"; apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9; add-apt-repository ppa:openjdk-r/ppa

RUN apt update; DEBIAN_FRONTEND="noninteractive" apt install -y --allow-unauthenticated r-base r-base-core r-recommended r-base-dev && rm -rf /var/lib/apt/lists/*

RUN mkdir -p /opt;
#Install Qualimap
RUN cd /opt && wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v${QUALIMAP_VER}.zip
RUN cd /opt && unzip qualimap_v${QUALIMAP_VER}.zip && rm qualimap_v${QUALIMAP_VER}.zip

RUN R -e 'install.packages(c("optparse","XML","BiocManager","remotes","RColorBrewer","tidyverse"), repos = "http://cran.r-project.org")'
RUN Rscript -e 'BiocManager::install(c("NOISeq","Repitools","Rsamtools","rtracklayer","GenomicFeatures"))'

ENV PATH "$PATH:/opt/qualimap_v${QUALIMAP_VER}"
ENV PATH "$PATH:/usr/local/bin"

WORKDIR /data/
14 changes: 12 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2

include { map_reads; make_report } from './modules/local/laava'
include { map_reads; make_report; hostgenect; bamqc } from './modules/local/laava'

NO_FILE = file("$projectDir/bin/NO_FILE")

Expand All @@ -11,6 +11,7 @@ workflow laava {
vector_fa
packaging_fa
host_fa
host_exons
repcap_name
vector_bed
vector_type
Expand All @@ -36,7 +37,13 @@ workflow laava {
.combine(max_allowed_missing_flanking)
.combine(flipflop_name)
.combine(flipflop_fa))

if (host_exons) {
hostgenect(map_reads.out.bam.combine(host_exons))
hostct=hostgenect.out
} else {
hostct=Channel.of(NO_FILE)
}
bamqc(map_reads.out.bam)
emit:
mapped_sam = map_reads.out.mapped_sam
mapped_bam = map_reads.out.mapped_bam
Expand All @@ -53,6 +60,8 @@ workflow laava {
sequence_error_tsv = make_report.out.sequence_error_tsv
flipflop_tsv = make_report.out.flipflop_tsv
rdata = make_report.out.rdata
qc=bamqc.out.qc
hostct=hostct
}

workflow {
Expand All @@ -74,6 +83,7 @@ workflow {
Channel.fromPath(params.vector_fa),
params.packaging_fa ? Channel.fromPath(params.packaging_fa) : Channel.of(NO_FILE),
params.host_fa ? Channel.fromPath(params.host_fa) : Channel.of(NO_FILE),
params.host_exons ? Channel.fromPath(params.host_exons) : false,
Channel.of(params.repcap_name),
Channel.fromPath(params.vector_bed),
Channel.of(params.vector_type),
Expand Down
30 changes: 27 additions & 3 deletions modules/local/laava.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process map_reads() {
label 'laava'
publishDir "$params.output", mode: "copy"

input:
Expand Down Expand Up @@ -26,10 +27,33 @@ process map_reads() {
"${packaging_fa_path}" "${host_fa_path}" "${repcap_name}"
"""
}


process hostgenect {
label 'laavasupp'
publishDir "${params.output}", mode: 'copy'
input:
tuple val(sid),path(sbam),path(exonbed)
output:
path("${sid}.bedtools.cov.txt")
script:
"""
hostgenect.sh -p ${sid} -e ${exonbed} ${sbam}
"""
}
process bamqc {
label 'laavasupp'
publishDir "$params.output/qc", mode: 'copy'
input:
tuple val(sid),path(bam)
output:
path("${sid}*"), emit: qc optional true
script:
"""
bamqc.sh -b ${bam} -p ${sid}
"""
}
process make_report() {
publishDir "$params.output", mode: "copy"
label 'laava'
publishDir "$params.output", mode: "copy"

input:
tuple val(sample_name),
Expand Down
54 changes: 37 additions & 17 deletions nextflow.config
< 10000 /tr>
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ manifest {

params {
// Workflow inputs
ui_source_type = 'file_source'
seq_reads_file = null
seq_reads_folder = null
vector_type = 'unspecified'
vector_bed = null
vector_fa = null
packaging_fa = null
Expand All @@ -29,31 +25,38 @@ params {
// Required for Form Bio workflows
output = 'output'

// Hosted Docker containers
container_repo = 'ghcr.io/formbio'
// use ':latest' for testing
container_version = '@sha256:ad2e6aa7249a712d28027617e3075a492e9a81a0c49dc50cd83d209a4ed98df8'
// For testing
citest=false
// use ":latest" for testing

containerrepo = 'ghcr.io/formbio'
container_version="@sha256:ad2e6aa7249a712d28027617e3075a492e9a81a0c49dc50cd83d209a4ed98df8"
}

process {
container = "${params.container_repo}/laava${params.container_version}"
cpus = 4
//shell = ['/bin/bash', '-euo', 'pipefail']

executor = 'google-batch'
executor = 'google-lifesciences' //'google-batch'
maxRetries = 3
maxErrors = '-1'
maxForks = 50
cache = 'lenient'
// Only has effect on cloud
machineType = 'e2-highmem-8'
disk = '500 GB'
//container = "${params.containerrepo}/laava${params.container_version}"
cpus = 4
//shell = ['/bin/bash', '-euo', 'pipefail']
withLabel: laava {
container = "${params.containerrepo}/laava${params.container_version}"
}
withLabel: laavasupp {
container = "${params.containerrepo}/laavasupp${params.container_version}"
}
}


// Execution environments

def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')

docker {
enabled = true
Expand All @@ -68,11 +71,30 @@ executor {

google {
region = 'us-central1'
//cloudprj = 'xx-sbx'
//project = "${params.cloudprj}"

batch {
bootDiskSize = '100.GB'
debug = true
sshDaemon = true
}

}

// Execution environments

def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')

docker {
enabled = true
}

executor {
queueSize = 50
name = 'google-batch'
submitRateLimit = '10sec'
pollInterval = '30 sec'
}

profiles {
Expand All @@ -82,8 +104,6 @@ profiles {
process.executor = 'local'
process.queue = 10
workDir = 'workflow-outputs/work'
process.container = "${params.container_repo}/laava:latest"

trace {
enabled = true
overwrite = true
Expand All @@ -106,11 +126,11 @@ profiles {
file = 'workflow-outputs/nextflow_logs/pipeline_dag_${trace_timestamp}.svg'
}
}

// Cloud platforms
googlebatch {
process {
executor = 'google-batch'
}
}
}

1 change: 1 addition & 0 deletions params-local-sc-no-ff.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"vector_fa": "test/samples/sc.construct.fasta",
"packaging_fa": "test/fasta/packaging.fa",
"host_fa": "test/fasta/hg38.chr19trunc-chrM.fa",
"host_exons": "test/fasta/exons.bed",
"repcap_name": "pRep2Cap9",
"target_gap_threshold": 200,
"max_allowed_outside_vector": 100,
Expand Down
8 changes: 8 additions & 0 deletions src/calculate_rdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ df.read1 <- x.all.read %>%
df.read1 <- df.read1[order(-df.read1$freq), ]


total_read_count.all <- sum(x.all.read$effective_count) #dim(x.all.read)[1]
df.read1 <- x.all.read %>%
group_by(assigned_type) %>%
summarise(e_count = sum(effective_count)) %>%
mutate(freq = round(e_count * 100 / total_read_count.all, 2))
df.read1 <- df.read1[order(-df.read1$freq), ]


# ----------------------------------------------------------
# Stats and plot for flip/flop analysis (if available)
# ----------------------------------------------------------
Expand Down
Loading
0