Commit 5971b3d2 authored by Rachel  LEGENDRE's avatar Rachel LEGENDRE
Browse files

correct bedgraph rule - change some dirnames

parent 3ee37bc7
......@@ -461,7 +461,7 @@ if config['mark_duplicates']['do']:
mark_duplicates_output = os.path.join(analysis_dir, "03-Deduplication/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions))
mark_duplicates_metrics = os.path.join(analysis_dir, "03-Deduplication/{SAMPLE}_{REF}_sort_dedup.txt")
mark_duplicates_remove = dedup
mark_duplicates_log_std = os.path.join(analysis_dir, "03-Deduplication/logs/{SAMPLE}_{REF}}_sort_dedup.out")
mark_duplicates_log_std = os.path.join(analysis_dir, "03-Deduplication/logs/{SAMPLE}_{REF}_sort_dedup.out")
mark_duplicates_log_err = os.path.join(analysis_dir, "03-Deduplication/logs/{SAMPLE}_{REF}_sort_dedup.err")
mark_duplicates_tmpdir = config['tmpdir']
#final_output.extend(expand(mark_duplicates_output, SAMPLE=samples, REF=ref))
......@@ -522,12 +522,13 @@ if config["bamCoverage"]["do"]:
#----------------------------------
# Estimate Library Complexity
#----------------------------------
lib_complexity_input = "{}/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions_dir, biasedRegions)
lib_complexity_metrics = os.path.join(analysis_dir, "05-QC/Complexity/{SAMPLE}_{REF}_metrics.txt")
lib_complexity_log_std = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.out")
lib_complexity_log_err = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.err")
final_output.extend(expand(lib_complexity_metrics, SAMPLE=samples, REF=ref))
include: os.path.join(RULES, "EstimateLibraryComplexity.rules")
if paired:
lib_complexity_input = "{}/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions_dir, biasedRegions)
lib_complexity_metrics = os.path.join(analysis_dir, "05-QC/Complexity/{SAMPLE}_{REF}_metrics.txt")
lib_complexity_log_std = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.out")
lib_complexity_log_err = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.err")
final_output.extend(expand(lib_complexity_metrics, SAMPLE=samples, REF=ref))
include: os.path.join(RULES, "EstimateLibraryComplexity.rules")
#----------------------------------
# plot Fingerprint
......@@ -667,7 +668,7 @@ if config["macs2"]["do"]:
macs2_options += " --nomodel "
macs2_shift_file = os.path.join(analysis_dir, "05-QC/PhantomPeakQualTools/{{SAMPLE}}_{}_sort{}_spp.out".format(ref, biasedRegions))
else:
macs2_shift_file = "Empty"
macs2_shift_file = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions)
if paired :
macs2_pe_mode = "no"
else:
......@@ -675,7 +676,8 @@ if config["macs2"]["do"]:
macs2_input_bam = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions)
macs2_control = INPUTtoIP
macs2_log = os.path.join(analysis_dir, "06-PeakCalling/macs2/{}/logs/{{SAMPLE}}.out".format(model_dir))
macs2_log_out = os.path.join(analysis_dir, "06-PeakCalling/macs2/{}/logs/{{SAMPLE}}.out".format(model_dir))
macs2_log_err = os.path.join(analysis_dir, "06-PeakCalling/macs2/{}/logs/{{SAMPLE}}.err".format(model_dir))
macs2_output = os.path.join(analysis_dir, "06-PeakCalling/macs2/{}/{{SAMPLE}}_peaks.{}Peak".format(model_dir, model))
macs2_output_prefix = os.path.join(analysis_dir, "06-PeakCalling/macs2/{}/{{SAMPLE}}".format(model_dir))
......@@ -724,7 +726,7 @@ if config["seacr"]["do"]:
seacr_output = os.path.join(analysis_dir, "06-PeakCalling/seacr/{{SAMPLE}}.{}.bed".format(config["seacr"]["threshold"]))
seacr_logs_std = os.path.join(analysis_dir, "06-PeakCalling/seacr/logs/{SAMPLE}_seacr_calling.out")
seacr_logs_err = os.path.join(analysis_dir, "06-PeakCalling/seacr/logs/{SAMPLE}_seacr_calling.err")
final_output.extend(expand(seacr_output, SAMPLE=IP_ALL))
final_output.extend(expand(seacr_output, SAMPLE=ALL_IP_PC))
include: os.path.join(RULES, "seacr.rules")
......@@ -732,20 +734,21 @@ if config["seacr"]["do"]:
# Peak Calling metrics
#----------------------------------
def stats_pc_input(wildcards):
if wildcards.CALLER == "macs2":
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/%s/{IP_REP}_peaks.{{MOD}}Peak" % (model_dir)), IP_REP=IP_ALL)
elif wildcards.CALLER == "seacr":
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/{IP_REP}.{{MOD}}.bed"), IP_REP=IP_ALL)
if config["macs2"]["do"] or config["seacr"]["do"] :
def stats_pc_input(wildcards):
if wildcards.CALLER == "macs2":
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/%s/{IP_REP}_peaks.{{MOD}}Peak" % (model_dir)), IP_REP=ALL_IP_PC)
elif wildcards.CALLER == "seacr":
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/{IP_REP}.{{MOD}}.bed"), IP_REP=ALL_IP_PC)
stats_peakCalling_input = stats_pc_input
stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics.out")
stats_peakCalling_marks = marks
stats_peakCalling_conds = conds
stats_peakCalling_rep = rep_flag
stats_peakCalling_log = os.path.join(analysis_dir, "06-PeakCalling/{CALLER}/{MOD}_Peaks_metrics.out")
include: os.path.join(RULES, "stats_peakCalling.rules")
final_output.extend(expand(stats_peakCalling_csv, zip, CALLER=peak_caller, MOD=mod))
stats_peakCalling_input = stats_pc_input
stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics_mqc.out")
stats_peakCalling_marks = marks
stats_peakCalling_conds = conds
stats_peakCalling_rep = rep_flag
stats_peakCalling_log = os.path.join(analysis_dir, "06-PeakCalling/{CALLER}/{MOD}_Peaks_metrics.out")
include: os.path.join(RULES, "stats_peakCalling.rules")
final_output.extend(expand(stats_peakCalling_csv, zip, CALLER=peak_caller, MOD=mod))
#----------------------------------
......@@ -930,6 +933,8 @@ if len(conds) > 1 and config["differential_analysis"]["do"]:
method = config["differential_analysis"]["method"]
norm = config["differential_analysis"]["normalisation"]
if config["differential_analysis"]["batch"] :
norm += "_batch"
chipflowr_init_input = feature_counts_output_count
chipflowr_init_conds = conds
......@@ -968,8 +973,10 @@ if config["igv_session"]["do"]:
if len(IP_REP) > 1 and config["macs2"]["do"]:
if config["intersectionApproach"]["do"]:
igv_session_input += expand(intersectionApproach_output, CALLER="macs2_" + model, IP_IDR=IP_REP)
else:
elif model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]:
igv_session_input += expand(select_peaks_output, CALLER="macs2", IP_IDR=IP_REP)
else :
igv_session_input += expand(macs2_output, SAMPLE=IP_ALL)
elif len(IP_REP) > 1 and config["seacr"]["do"]:
igv_session_input += expand(intersectionApproach_output, CALLER="seacr_" + config["seacr"]["threshold"], IP_IDR=IP_REP)
if len(IP_ALL) > 1 and config["macs2"]["do"]:
......@@ -994,6 +1001,8 @@ if config["igv_session"]["do"]:
#----------------------------------
try : model_dir
except NameError : model_dir = "seacr_" + config["seacr"]["threshold"]
if not config['seacr']['do'] and not config['macs2']['do']:
model_dir = "multiqc"
multiqc_input = final_output
multiqc_input_dir = analysis_dir
......
......@@ -122,13 +122,13 @@ sp:
deeptools/plotFingerprintOutRawCounts:
fn: '*_fingerprint_rawcounts.txt'
idr_metrics:
fn: 'IDR_metrics.out'
fn: 'IDR_metrics_mqc.out'
macs2_peaks_metrics:
fn: 'macs2*_Peaks_metrics.out'
fn: 'macs2*_Peaks_metrics_mqc.out'
seacr_peaks_metrics:
fn: 'seacr*_Peaks_metrics.out'
fn: 'seacr*_Peaks_metrics_mqc.out'
spikes_metrics:
fn: 'Spikes_metrics.out'
fn: 'Spikes_metrics_mqc.out'
frip_scores:
fn: 'frip_metrics_mqc.out'
......
......@@ -37,16 +37,18 @@ rule bedgraph:
out = bedgraph_logs
shell:
"""
temp_file=$(mktemp)
#bedtools bamtobed {params.options} -i {input.bam} | awk '$1==$4 && $6-$2 < 1000 {{print $0}}' - | cut -f 1,2,6 | sort -k1,1 -k2,2n -k3,3n > $temp_file
bedtools bamtobed {params.options} -i {input.bam} | sort -k1,1 -k2,2n -k3,3n > $temp_file
temp_file1=$(mktemp)
temp_file2=$(mktemp)
#sort BAM by read name
samtools sort -n -o $temp_file1 {input.bam}
bedtools bamtobed {params.options} -i $temp_file1 | awk '{{if ($2 != -1) {{print $0}} }}' | sort -k1,1 -k2,2n -k3,3n > $temp_file2
if [[ -s {input.scaleFactor} && {input.scaleFactor} == *"_scaleFactor.txt"* ]]
then
S=$(cat {input.scaleFactor})
bedtools genomecov -bg -i $temp_file -g {params.genome} -scale $S > {output} 2> {log.out}
bedtools genomecov -bg -i $temp_file2 -g {params.genome} -scale $S > {output} 2> {log.out}
else
bedtools genomecov -bg -i $temp_file -g {params.genome} > {output} 2> {log.out}
bedtools genomecov -bg -i $temp_file2 -g {params.genome} > {output} 2> {log.out}
fi
"""
......@@ -85,7 +85,7 @@ rule chipflowr_init:
out.write("pAdjustMethod <- '%s'\n" % (params.padj))
out.write("alpha <- %s\n" % (params.alpha))
if params.batch:
out.write("batch <- '%s'\n" % (params.batch))
out.write("batch <- %s\n" % (params.batch))
else:
out.write("batch <- NULL")
......
#########################################################################
# ePeak: Standardize and reproducible ChIP-seq analysis from raw #
# data to differential analysis #
# Authors: Rachel Legendre, Maelle Daunesse #
# Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. #
# #
# This file is part of ePeak workflow. #
# #
# ePeak is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# ePeak is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details . #
# #
# You should have received a copy of the GNU General Public License #
# along with ePeak (LICENSE). #
# If not, see <https://www.gnu.org/licenses/>. #
#########################################################################
rule compute_frip:
input:
bam = compute_frip_bam,
bed = compute_frip_peaks
output:
compute_frip_csv
params:
paired = compute_frip_paired
singularity:
"epeak.img"
log:
out = compute_frip_log
shell:
"""
temp_file=$(mktemp)
if [[ {params.paired} == "True" ]]
then
#write reads as bedpe bed file to get fragments instead of reads
bedtools bamtobed -bedpe -i {input.bam} > $temp_file
else
bedtools bamtobed -i {input.bam} > $temp_file
fi
#compute number of fragments overlap peaks
mapReads=$(bedtools intersect -a $temp_file -b {input.bed} -wa -u | wc -l)
#compute number of mapped reads
totReads=$(samtools view -c -F 260 {input.bam})
#compute FRiP
totReads/mapReads
# plot_type: 'generalstats'
Sample FRiP
H3K27ac_shCtrl 1.06
H3K27ac_shUbc9 1.14
Klf4_shCtrl 1.34
Klf4_shUbc9 1.19
"""
File mode changed from 100644 to 100755
......@@ -26,16 +26,17 @@
rule macs2:
input:
bam = macs2_input_bam,
control = macs2_control
control = macs2_control,
shift_file = macs2_shift_file
params:
gsize = config["macs2"]["genomeSize"],
options = macs2_options,
pe_mode = macs2_pe_mode,
prefix = macs2_output_prefix,
shift_file = macs2_shift_file,
read_length = 146
log:
out = macs2_log
out = macs2_log_out,
err = macs2_log_err
output:
macs2_output
singularity:
......@@ -46,20 +47,20 @@ rule macs2:
#if [[ "{input.control}" == "NA" ]] ; then input="" ; else input="{input.control}" ; fi
if [[ {params.shift_file} != "Empty" ]] ; then
if [[ -e {params.shift_file} ]] ; then
shift=$(awk '{{print $3}}' {params.shift_file} | awk -F ',' '{{if ($1=={params.read_length}){{print $2}} else {{print $1}} }}')
if [[ {input.shift_file} == "*_spp.out" ]] ; then
if [[ -e {input.shift_file} ]] ; then
shift=$(awk '{{print $3}}' {input.shift_file} | awk -F ',' '{{if ($1=={params.read_length}){{print $2}} else {{print $1}} }}')
if [[ {params.pe_mode} == "yes" ]] ; then
macs2 callpeak -t {input.bam} -c {input.control} -f BAMPE -g {params.gsize} -n {params.prefix} {params.options} --extsize ${{shift}} 2> {log.out}
macs2 callpeak -t {input.bam} -c {input.control} -f BAMPE -g {params.gsize} -n {params.prefix} {params.options} --extsize ${{shift}} > {log.out} 2> {log.err}
else
macs2 callpeak -t {input.bam} -c {input.control} -f BAM -g {params.gsize} -n {params.prefix} {params.options} --extsize ${{shift}} 2> {log.out}
macs2 callpeak -t {input.bam} -c {input.control} -f BAM -g {params.gsize} -n {params.prefix} {params.options} --extsize ${{shift}}> {log.out} 2> {log.err}
fi
fi
else
if [[ {params.pe_mode} == "yes" ]] ; then
macs2 callpeak -t {input.bam} -c {input.control} -f BAMPE -g {params.gsize} -n {params.prefix} {params.options} 2> {log.out}
macs2 callpeak -t {input.bam} -c {input.control} -f BAMPE -g {params.gsize} -n {params.prefix} {params.options} > {log.out} 2> {log.err}
else
macs2 callpeak -t {input.bam} -c {input.control} -f BAM -g {params.gsize} -n {params.prefix} {params.options} 2> {log.out}
macs2 callpeak -t {input.bam} -c {input.control} -f BAM -g {params.gsize} -n {params.prefix} {params.options} > {log.out} 2> {log.err}
fi
fi
"""
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment