Commit 9b580987 authored by Rachel  LEGENDRE's avatar Rachel LEGENDRE
Browse files

Merge branch 'dev' into 'master'

Dev to master

See merge request !6
parents 3ad41bd9 8cf36710
......@@ -522,12 +522,13 @@ if config["bamCoverage"]["do"]:
#----------------------------------
# Estimate Library Complexity
#----------------------------------
lib_complexity_input = "{}/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions_dir, biasedRegions)
lib_complexity_metrics = os.path.join(analysis_dir, "05-QC/Complexity/{SAMPLE}_{REF}_metrics.txt")
lib_complexity_log_std = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.out")
lib_complexity_log_err = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.err")
final_output.extend(expand(lib_complexity_metrics, SAMPLE=samples, REF=ref))
include: os.path.join(RULES, "EstimateLibraryComplexity.rules")
if paired:
lib_complexity_input = "{}/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions_dir, biasedRegions)
lib_complexity_metrics = os.path.join(analysis_dir, "05-QC/Complexity/{SAMPLE}_{REF}_metrics.txt")
lib_complexity_log_std = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.out")
lib_complexity_log_err = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.err")
final_output.extend(expand(lib_complexity_metrics, SAMPLE=samples, REF=ref))
include: os.path.join(RULES, "EstimateLibraryComplexity.rules")
#----------------------------------
# plot Fingerprint
......@@ -725,7 +726,7 @@ if config["seacr"]["do"]:
seacr_output = os.path.join(analysis_dir, "06-PeakCalling/seacr/{{SAMPLE}}.{}.bed".format(config["seacr"]["threshold"]))
seacr_logs_std = os.path.join(analysis_dir, "06-PeakCalling/seacr/logs/{SAMPLE}_seacr_calling.out")
seacr_logs_err = os.path.join(analysis_dir, "06-PeakCalling/seacr/logs/{SAMPLE}_seacr_calling.err")
final_output.extend(expand(seacr_output, SAMPLE=IP_ALL))
final_output.extend(expand(seacr_output, SAMPLE=ALL_IP_PC))
include: os.path.join(RULES, "seacr.rules")
......@@ -736,9 +737,9 @@ if config["seacr"]["do"]:
if config["macs2"]["do"] or config["seacr"]["do"] :
def stats_pc_input(wildcards):
if wildcards.CALLER == "macs2":
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/%s/{IP_REP}_peaks.{{MOD}}Peak" % (model_dir)), IP_REP=IP_ALL)
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/%s/{IP_REP}_peaks.{{MOD}}Peak" % (model_dir)), IP_REP=ALL_IP_PC)
elif wildcards.CALLER == "seacr":
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/{IP_REP}.{{MOD}}.bed"), IP_REP=IP_ALL)
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/{IP_REP}.{{MOD}}.bed"), IP_REP=ALL_IP_PC)
stats_peakCalling_input = stats_pc_input
stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics_mqc.out")
......@@ -828,6 +829,17 @@ if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or co
final_output.extend(expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP))
#----------------------------------
# Compute IA metrics
#----------------------------------
if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"]:
stats_IA_input = expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP)
stats_IA_csv = os.path.join(analysis_dir, "IA_metrics.out")
stats_IA_log = os.path.join(analysis_dir, "08-ReproduciblePeaks/{}/logs/IA_metrics.out".format(model_dir))
include: os.path.join(RULES, "stats_IA.rules")
final_output.extend([stats_IA_csv])
#----------------------------------
# Compute IDR metrics
#----------------------------------
......@@ -932,6 +944,8 @@ if len(conds) > 1 and config["differential_analysis"]["do"]:
method = config["differential_analysis"]["method"]
norm = config["differential_analysis"]["normalisation"]
if config["differential_analysis"]["batch"] :
norm += "_batch"
chipflowr_init_input = feature_counts_output_count
chipflowr_init_conds = conds
......@@ -998,6 +1012,8 @@ if config["igv_session"]["do"]:
#----------------------------------
try : model_dir
except NameError : model_dir = "seacr_" + config["seacr"]["threshold"]
if not config['seacr']['do'] and not config['macs2']['do']:
model_dir = "multiqc"
multiqc_input = final_output
multiqc_input_dir = analysis_dir
......
......@@ -128,6 +128,8 @@ sp:
fn: '*_fingerprint_rawcounts.txt'
idr_metrics:
fn: 'IDR_metrics.out'
ia_metrics:
fn: 'IA_metrics.out'
macs2_peaks_metrics:
fn: 'macs2*_Peaks_metrics.out'
seacr_peaks_metrics:
......@@ -199,6 +201,24 @@ custom_data:
title: 'Score'
description: 'If RR and SCR are ideal, score is equal to 1. If score is -1, results are concerning.'
format: '{:,.0f}'
ia_metrics:
id: "ia_metrics"
section_name: 'Intersection Approach metrics'
plot_type: 'table'
parent_id: "peak_section"
parent_name: "Peaks metrics"
parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
pconfig:
id: 'ia_metrics'
namespace: 'ia_metrics'
headers:
Sample:
title: 'Sample name'
description: 'Sample Name'
Peaks:
title: 'Number of peaks'
description: 'Number of peaks'
macs2_peaks_metrics:
id: 'macs2_peaks_metrics'
section_name: 'Number of peaks with MACS2'
......
......@@ -37,16 +37,18 @@ rule bedgraph:
out = bedgraph_logs
shell:
"""
temp_file=$(mktemp)
#bedtools bamtobed {params.options} -i {input.bam} | awk '$1==$4 && $6-$2 < 1000 {{print $0}}' - | cut -f 1,2,6 | sort -k1,1 -k2,2n -k3,3n > $temp_file
bedtools bamtobed {params.options} -i {input.bam} | sort -k1,1 -k2,2n -k3,3n > $temp_file
temp_file1=$(mktemp)
temp_file2=$(mktemp)
#sort BAM by read name
samtools sort -n -o $temp_file1 {input.bam}
bedtools bamtobed {params.options} -i $temp_file1 | awk '{{if ($2 != -1) {{print $0}} }}' | sort -k1,1 -k2,2n -k3,3n > $temp_file2
if [[ -s {input.scaleFactor} && {input.scaleFactor} == *"_scaleFactor.txt"* ]]
then
S=$(cat {input.scaleFactor})
bedtools genomecov -bg -i $temp_file -g {params.genome} -scale $S > {output} 2> {log.out}
bedtools genomecov -bg -i $temp_file2 -g {params.genome} -scale $S > {output} 2> {log.out}
else
bedtools genomecov -bg -i $temp_file -g {params.genome} > {output} 2> {log.out}
bedtools genomecov -bg -i $temp_file2 -g {params.genome} > {output} 2> {log.out}
fi
"""
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
#########################################################################
# ePeak: Standardize and reproducible ChIP-seq analysis from raw #
# data to differential analysis #
# Authors: Rachel Legendre, Maelle Daunesse #
# Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. #
# #
# This file is part of ePeak workflow. #
# #
# ePeak is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# ePeak is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details . #
# #
# You should have received a copy of the GNU General Public License #
# along with ePeak (LICENSE). #
# If not, see <https://www.gnu.org/licenses/>. #
#########################################################################
rule stats_IA:
input:
inputs = stats_IA_input
output:
stats_IA_csv
log:
out = stats_IA_log
run:
import pandas as pd
import os.path
from collections import OrderedDict
inputs = [os.path.realpath(f) for f in input['inputs']]
output = os.path.realpath(output[0])
#initialize dict for store all metrics
d = OrderedDict()
for file in inputs:
name = (os.path.basename(file)).split("_vs")[0]
nb_peak = sum(1 for line in open(file))
d[name] = [nb_peak]
#format dataframe
df = pd.DataFrame(data=d)
df = df.transpose()
df.reset_index(inplace=True)
# write dataframe in output file
with open(output, 'w') as f:
f.write("# plot_type: 'table'\n")
df.to_csv(output, mode='a',sep="\t", index=False, header=['Sample', 'Peaks'])
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment