config.yaml 12 KB
Newer Older
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
1
#########################################################################
2
# ePeak: Standardize and reproducible ChIP-seq analysis from raw        #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
3
4
5
6
#           data to differential analysis                               #
# Authors: Rachel Legendre, Maelle Daunesse                             #
# Copyright (c) 2019-2020  Institut Pasteur (Paris) and CNRS.           #
#                                                                       #
7
# This file is part of ePeak workflow.                                  #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
8
#                                                                       #
9
# ePeak is free software: you can redistribute it and/or modify         #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
10
11
12
13
# it under the terms of the GNU General Public License as published by  #
# the Free Software Foundation, either version 3 of the License, or     #
# (at your option) any later version.                                   #
#                                                                       #
14
# ePeak is distributed in the hope that it will be useful,              #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
15
16
17
18
19
# but WITHOUT ANY WARRANTY; without even the implied warranty of        #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          #
# GNU General Public License for more details .                         #
#                                                                       #
# You should have received a copy of the GNU General Public License     #
20
# along with ePeak (LICENSE).                                           #
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
21
22
23
24
25
# If not, see <https://www.gnu.org/licenses/>.                          #
#########################################################################



26

Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
27
# ========================================================
28
# ePeak pipeline config file
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
29
30
#=========================================================

Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
31
32
33
# path to the fastq directory
input_dir: /path/to/raw_data
# mate pair tag in the fastq filenames
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
34
input_mate: '_R[12]'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
35
# filename extension
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
36
input_extension: '.fastq.gz'
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
37
38
39
40
# path to the analysis directory
analysis_dir: /path/to/directory/analysis
# tmpdir: path to temporary directory (default /tmp/, but could be "/local/scratch/")
tmpdir: $TMPDIR
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
41
42
43
44
45
46
47

#===============================================================================
# Design information. These informations will be used during the
# peak calling step
#
# :Parameters:
#
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
48
49
50
51
# - design_file: Path to the design file in tabulated format. See README
# - marks: list of marks/TF (comma-separated)
# - condition: list of conditions (comma-separated). NB: The first condition is the reference condition
# - replicates: replicate tags in the fastq filenames (i.e. REP or Rep or rep)
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
52
53
54
55
56
# - spike: set to 'yes' if you have spikes in your data. Possible values: {yes, no}
# - spike_genome_file: path to genome file used for spike-in
#===============================================================================

design:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
57
58
59
    design_file: /path/to/directory/analysis/config/design.txt    
    marks: K4Me3
    condition: C, U
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
60
    replicates: Rep
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
61
62
    spike: yes
    spike_genome_file: /path/to/genome/directory/dmel9.fa
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
63
64

#===============================================================================
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
65
66
# Indexing section: if index is set to 'yes', indexes for bowtie2 will be produced 
# in genome_directory. If index is set to 'no', all parameters need to be filled.
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
67
68
69
#
# :Parameters:
#
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
70
71
72
73
# - index: 'yes' if index needs to be build, if 'no', this rule is ignored.
# - genome_directory: directory where all index are written
# - name: prefix use in all output files from mapping (see bowtie2 manual)
# - fasta_file: path to reference genome in fasta format
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
74
75
76
#===============================================================================

genome:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
77
78
    index: yes
    genome_directory: /path/to/genome/directory/
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
79
    name: mm10
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
80
    fasta_file: /path/to/genome/directory/mm10.fa
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
81
82
83
84
85
86
87

#===============================================================================
# FastQC section
#
# :Parameters:
#
# - options: Any valid FastQC parameter
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
88
# - threads: number of threads 
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
89
90
91
92
93
94
95
96
97
98
99
#===============================================================================

fastqc:
    options: ''
    threads: 4   

#===============================================================================
# Quality trimming and adapter removal with cutadapt
#
# :Parameters:
#
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
100
101
# - remove: if 'no', this rule is ignored.
# - adapter_list: string or fasta file (see cutadapt documentation)
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
102
# - m: int (trimmed reads as discard if size is shorted than the int)
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
103
104
105
# - mode: g for 5', a for 3', b for both 5'/3' (see cutadapt documentation)
# - options: any parameter of cutadapt (see cutadapt documentation)
# - quality: minimum Phred-score quality
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# - threads: number of threads 
#
#===============================================================================


adapters:
    remove: yes
    adapter_list: file:config/adapt.fa
    m: 25
    mode: a
    options: -O 6 --trim-n --max-n 1 
    quality: 30
    threads: 4



#===============================================================================
# bowtie2_mapping used to align reads against genome file
#
# :Parameters:
#
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
127
# - options: any parameter recognized by bowtie2 (see bowtie2 manual)
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
128
129
130
131
132
# - threads: number of threads to be used
#===============================================================================


bowtie2_mapping:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
133
    options: "--very-sensitive "
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
134
135
136
    threads: 4

#===============================================================================
137
# mark duplicates (picard-tools) allows to mark PCR duplicate in BAM files
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
138
139
140
#
# :Parameters:
#
141
# - do: if unchecked, this rule is ignored.
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
142
# - dedup_IP: If false, only INPUT files will be deduplicated (ie duplicated reads 
143
#             are removed from output), IP files are only marked (ie duplicated
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
144
#             reads are writted with appropriate flags set. If true all files 
145
146
#             will be deduplicated. Default value: true. 
# - threads: number of threads to be used
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
147
148
149
#===============================================================================

mark_duplicates:
150
151
    do: yes
    dedup_IP: 'True' 
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
152
153
154
    threads: 4

#===============================================================================
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
155
# remove biased genomic regions (previously named blacklisted regions)
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
156
157
158
#
# :Parameters:
#
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
159
# - do: if 'no', this rule is ignored.
160
# - bed_file: path to BED file containing all biased regions
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
161
# - threads: number of threads
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
162
163
164
#===============================================================================


165
remove_biasedRegions:
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
166
    do: yes
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
167
    bed_file: /path/to/genome/directory/mm10_biasedRegions.bed
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
168
169
170
171
172
173
174
175
    threads: 1

#===============================================================================
# peak calling with macs2.
#
# :Parameters:
#
# - model: model used by MACS2. Could be 'narrow' or 'broad'.
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
176
177
178
179
180
181
# - no-model: use '--no-model' option. See MACS2 documentation. Possible values: {yes, no}
# - options: any parameter recognized by MACS2. For paired-end reads, it is highly 
#   recommended to use '-f BAMPE' option. For broad peaks, use '--broad-cutoff 0.01' (See MACS2 manual)
# - cutoff: The q-value (minimum FDR) cutoff to call significant regions. For broad peak 
#   calling set to 0.01
# - genomeSize: It's the mappable genome size or effective genome size. (See MACS2 manual)
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
182
183
184
185
#
#===============================================================================


186
187
macs2:
    do: yes 
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
188
189
    mode_choice: 'narrow'    
    no_model: no             
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
190
191
192
193
    options: "--keep-dup all "
    cutoff: 0.1
    genomeSize: mm

194
195
196
197
198
199
200
201
202

#===============================================================================
# Peak calling with SEACR https://github.com/FredHutch/SEACR
# (recommended  for cut&run data)
#
# :Parameters:
#
# - do: if unchecked, this rule is ignored.
# - threshold: could be 'stringent' of 'relaxed'
203
# - norm: specify "norm" for normalized or "non" for non-normalized data processing.
204
205
#         if design:spike is set to yes, a scaling factor will be applied to data, 
#         specify "non" according to SEACR manual
206
207
208
209
210
211
212
213
214
215
#===============================================================================


seacr:
    do: no
    threshold: 'stringent'
    norm: 'norm'



Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
216
217
218
219
220
#===============================================================================
# Compute IDR on replicates, pseudo-replicates and pooled replicates
#
# :Parameters:
#
221
# - do: if unchecked, this rule is ignored.
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
222
223
# - rank: which column to use to rank peaks. Options: signal.value, p.value, q.value, columnIndex
# - thresh: report statistics for peaks with a global idr below this value. Default: 0.05
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
224
225
226
227
#
#===============================================================================

compute_idr:
228
    do: yes
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
229
230
    rank: 'signal.value'
    thresh: 0.05
231
232
233
234
235
236
237
238
239
240
241
242
243
244


#===============================================================================
# Compute intersection approach on replicates
#
# :Parameters:
#
# - do: if set to 'yes', will compute the intersection approach and use it
#   to select reproducible peaks. (for narrow only, correspond to the default broad approach)
# - ia_overlap: percentage of overlap between the peaks to be selected (-f parameter of bedtools intersect). Default: 0.8
#
#===============================================================================

intersectionApproach:
245
    do: no
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
246
247
248
249
250
251
252
253
254
    ia_overlap: 0.8



#===============================================================================
# Compute differential analysis
#
# :Parameters:
#
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
255
256
257
258
259
260
# - method: 'Limma' or 'DEseq2'
# - spikes: If you have spikes, set to 'yes' will use spike normalization
# - normalization: 'geometrical', 'spikes', 'scale', 'quantile', 'cyclicloess'
# - pAdjustMethod: Limma and DEseq2 options. For Limma, 'none', 'BH', 'BY' and 'holm' are possible.
#                  For DESeq2, 'holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr' and 'none' are accepted.
# - alpha: threshold for differential analysis. Default: 0.05
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
261
# - batch: NULL or a vector with batch effects as c("","")
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
262
# - input_counting: add all INPUT in count matrix
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
263
264
265
#===============================================================================

differential_analysis:
266
    do: no
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
267
268
    method: "Limma" 
    spikes: no
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
269
    normalisation: "scale" 
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
270
271
272
    pAdjustMethod: "BH"
    alpha: 0.05
    batch: NULL
273
    input_counting: no
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
274
275


276

Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
277
278
279
280
281
282
#############################################################################
# bamCoverage from Deeptools
#  see https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html
#
# :Parameters:
#
283
284
285
286
287
# - do: if unchecked, this rule is ignored
# - options: options related to deeptools
# - spike-in: set to yes to use spike-in data as sacaling factor
# see https://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html
# for more information about effective Genome Size
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
288
289
290
291

bamCoverage:
    do: yes
    options: "--binSize 10 --effectiveGenomeSize 2913022398 --normalizeUsing RPGC" 
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
292
    spike-in: no
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
293
294
    threads: 4

295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#############################################################################
# GeneBody heatmap plot from Deeptools
# see https://deeptools.readthedocs.io/en/develop/content/tools/plotHeatmap.html#usage-examples
#
# :Parameters:
#
# - do: if unchecked, this rule is ignored
# - regionsFileName: File name or names, in BED or GTF format, containing the regions to plot.
#
#===============================================================================

geneBody:
    do: yes
    regionsFileName: /path/to/genome/directory/mm10_genemodel.bed
    threads: 4

#==============================================================================
# IGV_session produce XML session readable by IGV browser with coverage and
# peak files
#
# :Parameters:
#
# - do: if unchecked, this rule is ignored
# - autoScale:
# - normalize:
#==============================================================================
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
321

322
323
324
325
igv_session:
    do: yes
    autoScale: True
    normalize: False
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
326
327
328
329
330
331
332

#===============================================================================
#   MultiQC aggregates results from bioinformatics analyses across many
#   samples into a single report.
#
# :Parameters:
#
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
333
# - options: any options recognised by MultiQC
Rachel  LEGENDRE's avatar
Rachel LEGENDRE committed
334
335
336
337
338
339
340
341
# - output-directory: Create report in the specified output directory
#===============================================================================


multiqc:
    options: " -f -e macs2 -x 03-Deduplication/*spikes* -x 02-Mapping/*_spike*"
    output-directory: "11-Multiqc"