Commit 732d6674 authored by Rachel  LEGENDRE's avatar Rachel LEGENDRE
Browse files

fix error in IA when nomodel

parent 8d2ddebf
......@@ -459,10 +459,10 @@ if config['mark_duplicates']['do']:
biasedRegions_dir = os.path.join(analysis_dir, "03-Deduplication")
mark_duplicates_input = bowtie2_mapping_sort
mark_duplicates_output = os.path.join(analysis_dir, "03-Deduplication/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions))
mark_duplicates_metrics = os.path.join(analysis_dir, "03-Deduplication/{{SAMPLE}}_{{REF}}_sort_dedup.txt")
mark_duplicates_metrics = os.path.join(analysis_dir, "03-Deduplication/{SAMPLE}_{REF}_sort_dedup.txt")
mark_duplicates_remove = dedup
mark_duplicates_log_std = os.path.join(analysis_dir, "03-Deduplication/logs/{{SAMPLE}}_{{REF}}_sort_dedup.out")
mark_duplicates_log_err = os.path.join(analysis_dir, "03-Deduplication/logs/{{SAMPLE}}_{{REF}}_sort_dedup.err")
mark_duplicates_log_std = os.path.join(analysis_dir, "03-Deduplication/logs/{SAMPLE}_{REF}}_sort_dedup.out")
mark_duplicates_log_err = os.path.join(analysis_dir, "03-Deduplication/logs/{SAMPLE}_{REF}_sort_dedup.err")
mark_duplicates_tmpdir = config['tmpdir']
#final_output.extend(expand(mark_duplicates_output, SAMPLE=samples, REF=ref))
include: os.path.join(RULES, "mark_duplicates.rules")
......@@ -477,7 +477,7 @@ if config["design"]["spike"]:
# counting on spikes
spikes_counting_input = expand(os.path.join(analysis_dir, "{}/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions_dir, biasedRegions)), SAMPLE=samples, REF="spikes")
spikes_counting_output_json = os.path.join(analysis_dir, "09-CountMatrix/Spikes_count.json")
spikes_counting_output = os.path.join(analysis_dir, "Spikes_metrics_mqc.out")
spikes_counting_output = os.path.join(analysis_dir, "Spikes_metrics.out")
spikes_counting_log = os.path.join(analysis_dir, "03-Deduplication/logs/Spikes_metrics.out")
final_output.extend([spikes_counting_output_json])
include: os.path.join(RULES, "spikes_counting.rules")
......@@ -739,7 +739,7 @@ def stats_pc_input(wildcards):
return expand(os.path.join(analysis_dir, "06-PeakCalling/{{CALLER}}/{IP_REP}.{{MOD}}.bed"), IP_REP=IP_ALL)
stats_peakCalling_input = stats_pc_input
stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics_mqc.out")
stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics.out")
stats_peakCalling_marks = marks
stats_peakCalling_conds = conds
stats_peakCalling_rep = rep_flag
......@@ -773,7 +773,7 @@ if config["macs2"]["do"] and config["compute_idr"]["do"]:
#----------------------------------
CALL_MOD = []
if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"]:
if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]:
CALL_MOD += ["macs2_" + model_dir]
# Select IDR peaks
def IDR_input_rep(wildcards):
......@@ -809,7 +809,7 @@ if (config["macs2"]["do"] and config["intersectionApproach"]["do"]) or (config["
CALL_MOD += ["macs2_" + model_dir]
def IA_input(wildcards):
if wildcards.CALLER == "macs2_"+ config["macs2"]["mode_choice"]:
if wildcards.CALLER == "macs2_"+ model_dir:
return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}/%s/{{IP_IDR}}_{CASE}_peaks.{MOD}Peak" % (model_dir)), CALLER="macs2", CASE=rep, MOD=config["macs2"]["mode_choice"])
elif wildcards.CALLER == "seacr_"+ config["seacr"]["threshold"]:
return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}/{{IP_IDR}}_{CASE}.{MOD}.bed"), CALLER="seacr", CASE=rep, MOD=config["seacr"]["threshold"])
......@@ -839,7 +839,7 @@ if config["macs2"]["do"] and config["compute_idr"]["do"]:
metrics_peaks_conds = conds
metrics_peaks_rep = rep_flag
metrics_peaks_logs = os.path.join(analysis_dir, "07-IDR/macs2/{}/logs/IDR_metrics.out".format(model_dir))
metrics_peaks_output = os.path.join(analysis_dir, "IDR_metrics_mqc.out")
metrics_peaks_output = os.path.join(analysis_dir, "IDR_metrics.out")
include: os.path.join(RULES, "metrics_peaks.rules")
final_output.extend([metrics_peaks_output])
......@@ -1017,7 +1017,7 @@ onsuccess:
# copy metrics json in the corresponding multiQC output when you are in exploratory mode
import shutil
import glob
for file in glob.glob(os.path.join(analysis_dir, "*metrics_mqc.out")):
for file in glob.glob(os.path.join(analysis_dir, "*metrics.out")):
shutil.copy2(file, os.path.join(analysis_dir, config['multiqc']['output-directory'] + "/%s" % (model_dir)))
# move cluster log files
......
......@@ -56,13 +56,116 @@ report_header_info:
# PLEASE DONT CHANGE FOLLOWING CONFIG
#-------------------------------------------------------------------------------
top_modules:
- 'fastqc'
- 'cutadapt'
- 'bowtie2'
- 'picard'
- 'deeptools'
- 'phantompeakqualtools'
- 'peaks_metrics'
- 'idr_metrics'
- 'spikes_metrics'
- 'feature_counts'
# Cleaning options for sample names. Typically, sample names are detected
# from an input filename. If any of these strings are found, they and any
# text to their right will be discarded.
# For example - file1.fq.gz_trimmed.bam_deduplicated_fastqc.zip
# would be cleaned to 'file1'
# Two options here - fn_clean_exts will replace the defaults,
# extra_fn_clean_exts will append to the defaults
extra_fn_clean_exts:
- .gz
- .fastq
- _trim
- _mapping.e
- _sort
- _sort_dedup_biasedRegions
- _sort_dedup
- _sort_biasedRegions
- _R1
- _R2
- type: remove
pattern: '.sorted'
- type: regex
pattern: '^Sample_\d+'
# Ignore these files / directories / paths when searching for logs
fn_ignore_files:
- .DS_Store
fn_ignore_dirs:
- .snakemake
- cluster_logs
- logs
fn_ignore_paths:
- 03-Deduplication/*spikes*
- 02-Mapping/*_spike*
- slurm*
# Overwrite module filename search patterns. See multiqc/utils/search_patterns.yaml
# for the defaults. Remove a default by setting it to null.
sp:
cutadapt:
fn: '*trim.txt'
phantompeakqualtools/out:
fn: '*_spp.out'
picard/markdups:
fn: '*_dedup.txt'
picard/insertsize:
fn: '*_fragmentSizeDistribution.txt'
deeptools/plotFingerprintOutRawCounts:
fn: '*_fingerprint_rawcounts.txt'
idr_metrics:
fn: 'IDR_metrics.out'
macs2_peaks_metrics:
fn: 'macs2*_Peaks_metrics.out'
seacr_peaks_metrics:
fn: 'seacr*_Peaks_metrics.out'
spikes_metrics:
fn: 'Spikes_metrics.out'
frip_scores:
fn: 'frip_metrics_mqc.out'
custom_data:
spikes_metrics:
id: 'spikes_metrics'
section_name: 'Spikes metrics'
section_description: 'Statistics about Irreproducible Discovery Rate (IDR) (see https://www.encodeproject.org/data-standards/terms/#concordance for more information) '
parent_id: "peak_section"
parent_name: "Peaks metrics"
parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
plot_type: 'table'
pconfig:
id: 'spikes_metrics'
namespace: 'spikes_metrics'
headers:
Spike:
title: 'Sample Name'
description: 'Sample Name'
MappedReads:
title: 'Mapped Reads'
description: 'Number of mapped reads'
Percent:
title: 'Percent'
description: 'Percent'
idr_metrics:
id: 'idr_metrics'
id: "idr_metrics"
section_name: 'IDR metrics'
plot_type: 'table'
parent_id: "peak_section"
parent_name: "Peaks metrics"
parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
pconfig:
namespace: 'IDR Metrics'
id: 'idr_metrics'
namespace: 'idr_metrics'
headers:
cond:
title: 'Condition'
......@@ -91,12 +194,16 @@ custom_data:
title: 'Score'
description: 'If RR and SCR are ideal, score is equal to 1. If score is -1, results are concerning.'
format: '{:,.0f}'
peaks_metrics:
id: 'Peaks metrics'
section_name: 'Peaks metrics'
macs2_peaks_metrics:
id: 'macs2_peaks_metrics'
section_name: 'Number of peaks with MACS2'
parent_id: "peak_section"
parent_name: "Peaks metrics"
parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
plot_type: 'table'
pconfig:
namespace: 'Number of peaks with MACS2'
id: 'macs2_peaks_metrics'
namespace: 'macs2_peaks_metrics'
headers:
Sample:
title: 'Sample name'
......@@ -104,23 +211,30 @@ custom_data:
Peaks:
title: 'Number of peaks'
description: 'Number of peaks'
spikes_metrics:
id: 'Spikes metrics'
section_name: 'Spikes metrics'
seacr_peaks_metrics:
id: 'seacr_peaks_metrics'
section_name: 'Number of peaks with SEACR'
parent_id: "peak_section"
parent_name: "Peaks metrics"
parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
plot_type: 'table'
pconfig:
namespace: 'Spikes metrics'
id: 'seacr_peaks_metrics'
namespace: 'seacr_peaks_metrics'
headers:
Spike:
title: 'Sample Name'
Sample:
title: 'Sample name'
description: 'Sample Name'
MappedReads:
title: 'Mapped Reads'
description: 'Number of mapped reads'
Percent:
title: 'Percent'
description: 'Percent'
Peaks:
title: 'Number of peaks'
description: 'Number of peaks'
table_columns_placement:
idr_metrics:
RR: 1300
SCR: 1400
Score: 1500
table_cond_formatting_rules:
RR:
......@@ -157,21 +271,6 @@ table_cond_formatting_rules:
fail:
- lt: 0.5
top_modules:
- 'fastqc'
- 'cutadapt'
- 'bowtie2'
- 'picard'
- 'deeptools'
- 'phantompeakqualtools'
- 'peaks_metrics'
- 'idr_metrics'
- 'spikes_metrics'
- 'feature_counts'
section_comments:
idr_metrics: 'Statistics about Irreproducible Discovery Rate (IDR) (see https://www.encodeproject.org/data-standards/terms/#concordance for more information) '
......@@ -186,37 +285,7 @@ data_dir_name: multiqc_data
# Whether to create the parsed data directory in addition to the report
make_data_dir: True
# Cleaning options for sample names. Typically, sample names are detected
# from an input filename. If any of these strings are found, they and any
# text to their right will be discarded.
# For example - file1.fq.gz_trimmed.bam_deduplicated_fastqc.zip
# would be cleaned to 'file1'
# Two options here - fn_clean_exts will replace the defaults,
# extra_fn_clean_exts will append to the defaults
extra_fn_clean_exts:
- .gz
- .fastq
- _trim
- _mapping.e
- _sort
- _sort_dedup_NoBlacklist
- _sort_dedup
- _R1
- _R2
- type: remove
pattern: '.sorted'
- type: regex
pattern: '^Sample_\d+'
# Ignore these files / directories / paths when searching for logs
fn_ignore_files:
- .DS_Store
fn_ignore_dirs:
- annoying_dirname
fn_ignore_paths:
- '*/path/to/*_files/'
# Ignore files larger than this when searcing for logs (bytes)
log_filesize_limit: 5000000
......@@ -240,29 +309,6 @@ plots_flat_numseries: 100 # If neither of the above, use flat if > this n
num_datasets_plot_limit: 50 # If interactive, don't plot on load if > this number of datasets
max_table_rows: 500 # Swap tables for a beeswarm plot above this
# Overwrite module filename search patterns. See multiqc/utils/search_patterns.yaml
# for the defaults. Remove a default by setting it to null.
sp:
cutadapt:
fn: '*trim.txt'
phantompeakqualtools/out:
fn: '*_spp.out'
picard/markdups:
fn: '*_dedup.txt'
picard/insertsize:
fn: '*_fragmentSizeDistribution.txt'
deeptools/plotFingerprintOutRawCounts:
fn: '*_fingerprint_rawcounts.txt'
idr_metrics:
fn: 'IDR_metrics_mqc.out'
peaks_metrics:
fn: 'Peaks_metrics_mqc.out'
spikes_metrics:
fn: 'Spikes_metrics_mqc.out'
frip_scores:
fn: 'frip_metrics_mqc.out'
# Overwrite the defaults of which table columns are visible by default
table_columns_visible:
......
......@@ -55,4 +55,11 @@ rule compute_frip:
#compute FRiP
totReads/mapReads
# plot_type: 'generalstats'
Sample FRiP
H3K27ac_shCtrl 1.06
H3K27ac_shUbc9 1.14
Klf4_shCtrl 1.34
Klf4_shUbc9 1.19
"""
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment