multiqc_config.yaml 11.2 KB
Newer Older
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
1
#########################################################################
2
# ePeak: Standardize and reproducible ChIP-seq analysis from raw        #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
3
4
5
6
#           data to differential analysis                               #
# Authors: Rachel Legendre, Maelle Daunesse                             #
# Copyright (c) 2019-2020  Institut Pasteur (Paris) and CNRS.           #
#                                                                       #
7
# This file is part of ePeak workflow.                                  #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
8
#                                                                       #
9
# ePeak is free software: you can redistribute it and/or modify         #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
10
11
12
13
# it under the terms of the GNU General Public License as published by  #
# the Free Software Foundation, either version 3 of the License, or     #
# (at your option) any later version.                                   #
#                                                                       #
14
# ePeak is distributed in the hope that it will be useful,              #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
15
16
17
18
19
# but WITHOUT ANY WARRANTY; without even the implied warranty of        #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          #
# GNU General Public License for more details .                         #
#                                                                       #
# You should have received a copy of the GNU General Public License     #
20
# along with ePeak (LICENSE).                                           #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
21
22
23
24
25
# If not, see <https://www.gnu.org/licenses/>.                          #
#########################################################################


#===============================================================================
26
# Optimized MultiQc config file dedicated to ePeak workflow
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
27
28
29
30
31
32
33
34
#===============================================================================

#------------------------------------------------------------------------------
#        TO CHANGE
#-------------------------------------------------------------------------------

# Title to use for the report.
title: "ChIP-seq analysis"
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
35
subtitle: "test of epigenomics"                                                     # Set your own text
36
intro_text: "MultiQC reports summarise analysis results produced from ePeak"     
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
37
38
39

# Add generic information to the top of reports
report_header_info:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
40
41
42
43
44
    - Contact E-mail: '<login>@example.com'                                         # Set your own text
    - Application Type: 'ChIP-seq'                                                  # Set your own text
    - Project Type: 'Differential peak expression'                                  # Set your own text
    - Sequencing Platform: 'HiSeq 2500 High Output V4'                              # Set your own text
    - Sequencing Setup: 'PE75'                                                      # Set your own text
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58

#-------------------------------------------------------------------------------


# Specify a custom logo to add to reports (uncomment to use)
#custom_logo: '         # '/path/to/logo.png'
#custom_logo_url: ''    # 'https://www.example.com'
#custom_logo_title: ''  # 'Our Institute Name'


#-------------------------------------------------------------------------------
#        PLEASE DONT CHANGE FOLLOWING CONFIG
#-------------------------------------------------------------------------------

Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
top_modules:
    - 'fastqc'
    - 'cutadapt'
    - 'bowtie2'
    - 'picard'
    - 'deeptools'
    - 'phantompeakqualtools'
    - 'peaks_metrics'
    - 'idr_metrics'
    - 'spikes_metrics'
    - 'feature_counts'



# Cleaning options for sample names. Typically, sample names are detected
# from an input filename. If any of these strings are found, they and any
# text to their right will be discarded.
# For example - file1.fq.gz_trimmed.bam_deduplicated_fastqc.zip
# would be cleaned to 'file1'
# Two options here - fn_clean_exts will replace the defaults,
# extra_fn_clean_exts will append to the defaults
extra_fn_clean_exts:
    - .gz
    - .fastq
    - _trim
    - _mapping.e
    - _sort
    - _sort_dedup_biasedRegions
    - _sort_dedup
    - _sort_biasedRegions
    - _R1
    - _R2
    - type: remove
      pattern: '.sorted'
    - type: regex
      pattern: '^Sample_\d+'

# Ignore these files / directories / paths when searching for logs
fn_ignore_files:
    - .DS_Store

fn_ignore_dirs:
    - .snakemake
    - cluster_logs
    - logs 

fn_ignore_paths:
    - 03-Deduplication/*spikes*
    - 02-Mapping/*_spike*
    - slurm*


# Overwrite module filename search patterns. See multiqc/utils/search_patterns.yaml
# for the defaults. Remove a default by setting it to null.
sp:
    cutadapt:
        fn: '*trim.txt'
    phantompeakqualtools/out:
        fn: '*_spp.out'
    picard/markdups:
        fn: '*_dedup.txt'
    picard/insertsize:
        fn: '*_fragmentSizeDistribution.txt'
    deeptools/plotFingerprintOutRawCounts:
        fn: '*_fingerprint_rawcounts.txt'
    idr_metrics:
        fn: 'IDR_metrics.out'
    macs2_peaks_metrics:
        fn: 'macs2*_Peaks_metrics.out'
    seacr_peaks_metrics:
        fn: 'seacr*_Peaks_metrics.out'
    spikes_metrics:
        fn: 'Spikes_metrics.out'
    frip_scores:
        fn: 'frip_metrics_mqc.out'



Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
137
custom_data:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
    spikes_metrics:
        id: 'spikes_metrics'
        section_name: 'Spikes metrics'
        section_description: 'Statistics about Irreproducible Discovery Rate (IDR) (see https://www.encodeproject.org/data-standards/terms/#concordance for more information) '
        parent_id: "peak_section"
        parent_name: "Peaks metrics"
        parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
        plot_type: 'table'
        pconfig:
            id: 'spikes_metrics'
            namespace: 'spikes_metrics'
        headers:
            Spike:
                title: 'Sample Name'
                description: 'Sample Name'
            MappedReads:
                title: 'Mapped Reads'
                description: 'Number of mapped reads'
            Percent:
                title: 'Percent'
                description: 'Percent'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
159
    idr_metrics:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
160
        id: "idr_metrics"
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
161
162
        section_name: 'IDR metrics'
        plot_type: 'table'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
163
164
165
        parent_id: "peak_section"
        parent_name: "Peaks metrics"
        parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
166
        pconfig:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
167
168
            id: 'idr_metrics'
            namespace: 'idr_metrics'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
        headers:
            cond:
                title: 'Condition'
                description: 'Condition'
            Replicates:
                title: '# peaks'
                description: 'Total number of peaks passed IDR in replicates'
            PPR:
                title: 'PPR'
                description: 'Total number of peaks passed IDR in pseudo-replicates'
            SPR1:
                title: 'SPR1'
                description: 'Total number of peaks passed IDR in self pseudo-replicates 1'
            SPR2:
                title: 'SPR2'
                description: 'Total number of peaks passed IDR in self pseudo-replicates 1'
            RR:
                title: 'RR'
                description: 'The rescue ratio measures consistency between datasets when the replicates within a single experiment are not comparable.'
                format: '{:,.0f}'
            SCR:
                title: 'SCR'
                description: 'The self-consistency ratio measures consistency within a single dataset'
                format: '{:,.0f}'
            Score:
                title: 'Score'
                description: 'If RR and SCR are ideal, score is equal to 1. If score is -1, results are concerning.'
                format: '{:,.0f}'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
197
198
199
200
201
202
    macs2_peaks_metrics:
        id: 'macs2_peaks_metrics'
        section_name: 'Number of peaks with MACS2'
        parent_id: "peak_section"
        parent_name: "Peaks metrics"
        parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
203
204
        plot_type: 'table'
        pconfig:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
205
206
            id: 'macs2_peaks_metrics'
            namespace: 'macs2_peaks_metrics'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
207
208
209
210
211
212
213
        headers:
            Sample:
                title: 'Sample name'
                description: 'Sample Name'
            Peaks:
                title: 'Number of peaks'
                description: 'Number of peaks'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
214
215
216
217
218
219
    seacr_peaks_metrics:
        id: 'seacr_peaks_metrics'
        section_name: 'Number of peaks with SEACR'
        parent_id: "peak_section"
        parent_name: "Peaks metrics"
        parent_description: "This section contains metrics and statistics about peak calling, IDR and spike-in"
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
220
221
        plot_type: 'table'
        pconfig:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
222
223
            id: 'seacr_peaks_metrics'
            namespace: 'seacr_peaks_metrics'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
224
        headers:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
225
226
            Sample:
                title: 'Sample name'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
227
                description: 'Sample Name'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
228
229
230
231
232
233
234
235
236
237
            Peaks:
                title: 'Number of peaks'
                description: 'Number of peaks'


table_columns_placement:
    idr_metrics:
        RR: 1300
        SCR: 1400
        Score: 1500
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318

table_cond_formatting_rules:
    RR:
        pass:
            - lt: 2.0
        warn:
            - gt: 1.8
        fail:
            - gt: 2.0
    SCR:
        pass:
            - lt: 2.0
        fail:
            - gt: 2.0
    Score:
        pass:
            - eq: 1.0
        warn:
            - eq: 0.0
        fail:
            - eq: -1.0
    mqc-generalstats-phantompeakqualtools-NSC:
        pass:
            - gt: 1.05
        warn:
            - lt: 1.05
        fail:
            - lt: 0.9
    mqc-generalstats-phantompeakqualtools-RSC:
        pass:
            - gt: 0.8
        warn:
            - lt: 0.8
        fail:
            - lt: 0.5




# Prepend sample names with their directory. Useful if analysing the
# sample samples with different parameters.
prepend_dirs: False

# Default output filenames
output_fn_name: multiqc_report.html
data_dir_name: multiqc_data

# Whether to create the parsed data directory in addition to the report
make_data_dir: True



# Ignore files larger than this when searcing for logs (bytes)
log_filesize_limit: 5000000

# MultiQC skips a couple of debug messages when searching files as the
# log can get very verbose otherwise. Re-enable here to help debugging.
report_readerrors: False
report_imgskips: False

# Opt-out of remotely checking that you're running the latest version
no_version_check: False

# How to plot graphs. Different templates can override these settings, but
# the default template can use interactive plots (Javascript using HighCharts)
# or flat plots (images, using MatPlotLib). With interactive plots, the report
# can prevent automatically rendering all graphs if there are lots of samples
# to prevent the browser being locked up when the report opens.
plots_force_flat: False          # Try to use only flat image graphs
plots_force_interactive: False   # Try to use only interactive javascript graphs
plots_flat_numseries: 100        # If neither of the above, use flat if > this number of datasets
num_datasets_plot_limit: 50      # If interactive, don't plot on load if > this number of datasets
max_table_rows: 500              # Swap tables for a beeswarm plot above this


# Overwrite the defaults of which table columns are visible by default
table_columns_visible:
    FastQC:
        percent_fails: False
        total_sequences: True