Commit 37493de5 authored by Rachel  LEGENDRE's avatar Rachel LEGENDRE
Browse files

add IA metrics issue #7

parent 5971b3d2
......@@ -829,6 +829,17 @@ if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or co
final_output.extend(expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP))
#----------------------------------
# Compute IA metrics
#----------------------------------
if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"]:
stats_IA_input = expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP)
stats_IA_csv = os.path.join(analysis_dir, "IA_metrics.out")
stats_IA_log = os.path.join(analysis_dir, "08-ReproduciblePeaks/{}/logs/IA_metrics.out".format(model_dir))
include: os.path.join(RULES, "stats_IA.rules")
final_output.extend([stats_IA_csv])
#----------------------------------
# Compute IDR metrics
#----------------------------------
......
......@@ -88,6 +88,12 @@ extra_fn_clean_exts:
- _sort_biasedRegions
- _R1
- _R2
- _1
- _2
- _mm10
- _mm9
- _hg19
- _hg38
- type: remove
pattern: '.sorted'
- type: regex
......@@ -100,7 +106,6 @@ fn_ignore_files:
fn_ignore_dirs:
- .snakemake
- cluster_logs
- logs
fn_ignore_paths:
- 03-Deduplication/*spikes*
......@@ -122,13 +127,15 @@ sp:
deeptools/plotFingerprintOutRawCounts:
fn: '*_fingerprint_rawcounts.txt'
idr_metrics:
fn: 'IDR_metrics_mqc.out'
fn: 'IDR_metrics.out'
ia_metrics:
fn: 'IA_metrics.out'
macs2_peaks_metrics:
fn: 'macs2*_Peaks_metrics_mqc.out'
fn: 'macs2*_Peaks_metrics.out'
seacr_peaks_metrics:
fn: 'seacr*_Peaks_metrics_mqc.out'
fn: 'seacr*_Peaks_metrics.out'
spikes_metrics:
fn: 'Spikes_metrics_mqc.out'
fn: 'Spikes_metrics.out'
frip_scores:
fn: 'frip_metrics_mqc.out'
......@@ -194,6 +201,24 @@ custom_data:
title: 'Score'
description: 'If RR and SCR are ideal, score is equal to 1. If score is -1, results are concerning.'
format: '{:,.0f}'
ia_metrics:
id: "ia_metrics"
section_name: 'Intersection Approach metrics'
plot_type: 'table'
parent_id: "peak_section"
parent_name: "Peaks metrics"
parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
pconfig:
id: 'ia_metrics'
namespace: 'ia_metrics'
headers:
Sample:
title: 'Sample name'
description: 'Sample Name'
Peaks:
title: 'Number of peaks'
description: 'Number of peaks'
macs2_peaks_metrics:
id: 'macs2_peaks_metrics'
section_name: 'Number of peaks with MACS2'
......
#########################################################################
# ePeak: Standardize and reproducible ChIP-seq analysis from raw #
# data to differential analysis #
# Authors: Rachel Legendre, Maelle Daunesse #
# Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. #
# #
# This file is part of ePeak workflow. #
# #
# ePeak is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# ePeak is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details . #
# #
# You should have received a copy of the GNU General Public License #
# along with ePeak (LICENSE). #
# If not, see <https://www.gnu.org/licenses/>. #
#########################################################################
rule stats_IA:
input:
inputs = stats_IA_input
output:
stats_IA_csv
log:
out = stats_IA_log
run:
import pandas as pd
import os.path
from collections import OrderedDict
inputs = [os.path.realpath(f) for f in input['inputs']]
output = os.path.realpath(output[0])
#initialize dict for store all metrics
d = OrderedDict()
for file in inputs:
name = (os.path.basename(file)).split("_vs")[0]
nb_peak = sum(1 for line in open(file))
d[name] = [nb_peak]
#format dataframe
df = pd.DataFrame(data=d)
df = df.transpose()
df.reset_index(inplace=True)
# write dataframe in output file
with open(output, 'w') as f:
f.write("# plot_type: 'table'\n")
df.to_csv(output, mode='a',sep="\t", index=False, header=['Sample', 'Peaks'])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment