Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gael MILLOT
14985_loot
Commits
a12b9b1b
Commit
a12b9b1b
authored
Jan 28, 2022
by
Gael MILLOT
Browse files
release v7.3.0: flow and files improved by Fred, because of the relative paths, no need of dev/
parent
fb383cb6
Changes
10
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
a12b9b1b
...
...
@@ -198,6 +198,11 @@ Gitlab developers
## WHAT'S NEW IN
### v7.3.0
1) flow and files improved by Fred, because of the relative paths, no need of dev/
### v7.2.0
1) better priority for slurm
...
...
bin/Nremove.sh
View file @
a12b9b1b
#!usr/bin/env bash
#!
/
usr/bin/env bash
#########################################################################
## ##
...
...
bin/coverage.sh
View file @
a12b9b1b
#!usr/bin/env bash
#!
/
usr/bin/env bash
#########################################################################
## ##
...
...
bin/cutoff.sh
View file @
a12b9b1b
#!usr/bin/env bash
#!
/
usr/bin/env bash
#########################################################################
## ##
...
...
bin/duplicate_removal.sh
View file @
a12b9b1b
#!usr/bin/env bash
#!
/
usr/bin/env bash
#########################################################################
## ##
...
...
bin/fivep_filtering.sh
View file @
a12b9b1b
#!usr/bin/env bash
#!
/
usr/bin/env bash
#########################################################################
## ##
...
...
bin/trim.sh
View file @
a12b9b1b
#!usr/bin/env bash
#!
/
usr/bin/env bash
#########################################################################
## ##
...
...
dev/test_local.config
deleted
100644 → 0
View file @
fb383cb6
/*
#########################################################################
## ##
## nextflow.config ##
## ##
## Gael A. Millot ##
## Bioinformatics and Biostatistics Hub ##
## Computational Biology Department ##
## Institut Pasteur Paris ##
## ##
#########################################################################
*/
////////
variables
that
will
be
used
only
in
the
main
.
nf
//
variables
exported
to
the
main
.
nf
environment
.
See
https
://
www
.
nextflow
.
io
/
docs
/
latest
/
config
.
html
#scope-env
env
{
////
path
and
files
git_path
=
"https://gitlab.pasteur.fr/gmillot/14985_loot/"
in_path
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset"
//
in_path
=
"/mnt/share/14985_loot/dataset/B2699/00_Rawdata"
//
in_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/3"
//
where
initial
fastq
file
is
//
in_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/"
//
in_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B2699/00_Rawdata"
//
where
initial
fastq
file
is
fastq_file
=
"test.fastq2.gz"
//
fastq
file
name
//
fastq_file
=
"4-4_S1_L001_R1_001.fastq.gz"
//
fastq_file
=
"3-4_S1_L001_R1_001.fastq.gz"
primer_fasta
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
//
primer_fasta
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
//
list
of
primers
used
for
the
library
and
used
by
Alien
trimmer
to
trim
the
raw
reads
//
primer_fasta
=
"/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
////
end
path
and
files
////
alientrimmer
alientrimmer_l_param
=
30
//
L
parameter
of
alienTrimmer
////
end
alientrimmer
////
fivep_filtering
attc_seq
=
"CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG"
//
sequence
of
attc
,
in
red
and
purple
in
section
4
20200505
of
the
CL
labbook
(
48
bases
on
the
left
of
the
cutting
site
).
Required
for
plotting
.
Warning
:
never
change
this
sequence
fivep_seq_filtering
=
'^CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG.+$'
//
regex
indicating
the
5
'
sequence
of
reads
to
select
,
then
to
trim
from
the
selected
reads
.
See
the
section
8
.
6
to
8
.
13
of
the
labbook
20200520
,
but
instead
of
analysing
and
trimming
in
two
steps
(
29
Nuc
of
AttC
part
of
the
primer
then
19
Nuc
between
primer
and
Attc
cutting
site
),
perform
all
in
a
single
step
,
and
play
with
the
regex
,
like
Test
also
// ^
CAATTCATTCAAGCCGACGCCGCTTCGCG
[
GN
][
CN
][
GN
][
CN
][
GN
][
GN
][
CN
][
TN
][
TN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
GN
].+$
// [
CN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
CN
][
GN
][
AN
][
CN
][
GN
][
CN
][
CN
][
GN
][
CN
][
TN
][
TN
][
CN
][
GN
][
CN
][
GN
][
GN
][
CN
][
GN
][
CN
][
GN
][
GN
][
CN
][
TN
][
TN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
GN
].+$
// ^
CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG
.+$
// ^[
CN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
CN
][
GN
][
AN
][
CN
][
GN
][
CN
][
CN
][
GN
][
CN
][
TN
][
TN
][
CN
][
GN
][
CN
][
GN
]
GCGCGGCTTAATTCAAGCG
.+$
fivep_seq_nb
=
48
//
must
be
the
exact
number
of
nuc
positions
indicated
in
fivep_seq_filtering
////
end
fivep_filtering
cutoff_nb
=
25
//
reads
of
length
cutoff_nb
after
trimming
are
removed
ref_path
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
//
ref_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/reference_genomes/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
//
path
of
the
reference
genome
ref_file
=
"Ecoli-K12-MG1655_ORI_CENTERED.fasta"
//
fasta
file
of
the
reference
genome
ori_coord
=
"2320711 2320942"
// [
2320711
,
2320942
] //
Ecoli
centered
coordinates
ter_coord
=
"4627368 4627400"
//[
4627368
,
4627400
] //
Ecoli
centered
coordinates
color_coverage
=
"5"
//
three
integers
for
the
color
of
the
three
coverage
plots
[
1
,
2
,
5
]
xlab
=
"Ecoli Genome (bp)"
//
name
of
the
reference
genome
for
graphics
genome_size
=
"4641652"
//
in
bp
cute_path
=
"https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v10.9.0/cute_little_R_functions.R"
//
single
character
string
indicating
the
file
(
and
absolute
pathway
)
of
the
required
cute_little_R_functions
toolbox
.
With
ethernet
connection
available
,
this
can
also
be
used
:
"https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R"
or
local
"C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R"
}
////////
end
variables
that
will
be
used
only
in
the
main
.
nf
////////
variables
that
will
be
used
below
(
and
potentially
in
the
main
.
nf
file
)
////
must
be
also
exported
system_exec
=
'local'
//
the
system
that
runs
the
workflow
.
Either
'local'
or
'slurm'
out_path
=
"/mnt/c/Users/Gael/Desktop"
//
where
the
report
file
will
be
saved
.
Example
report_path
=
'.'
for
where
the
main
.
nf
run
is
executed
or
report_path
=
'/mnt/c/Users/Gael/Desktop'
//
out_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results"
//
where
the
report
file
will
be
saved
.
Example
report_path
=
'.'
for
where
the
main
.
nf
run
is
executed
or
report_path
=
'/mnt/c/Users/Gael/Desktop'
////
end
must
be
also
exported
////
general
variables
result_folder_name
=
"20220120_res_CL14985_test"
////
end
general
variables
////
slurm
variables
fastqueue
=
'common,dedicated'
//
fast
for
-
p
option
of
slurm
.
Example
:
fastqueue
=
'common,dedicated'
.
Example
:
fastqueue
=
'hubbioit'
fastqos
=
'--qos=fast'
//
fast
for
--
qos
option
of
slurm
.
Example
:
fastqos
=
'--qos=fast'
normalqueue
=
'common,dedicated'
//
normal
for
-
p
option
of
slurm
.
Example
:
normalqueue
=
'bioevo'
normalqos
=
'--qos=hubbioit'
//
normal
for
--
qos
option
of
slurm
.
Example
:
normalqos
=
'--qos=dedicated'
longqueue
=
'common,dedicated'
//
slow
for
-
p
option
of
slurm
.
Example
:
longqueue
=
'bioevo'
longqos
=
'--qos=hubbioit'
//
slow
for
--
qos
option
of
slurm
.
Example
:
longqos
=
'--qos=dedicated'
add_options
=
' '
//
additional
option
of
slurm
.
Example
:
addoptions
=
'--exclude=maestro-1101,maestro-1034'
or
add_options
=
' '
////
end
slurm
variables
////////
end
variables
that
will
be
used
below
////////
Pre
processing
int
secs
= (
new
Date
().
getTime
())/
1000
out_path
=
"${out_path}/${result_folder_name}_${secs}"
////////
end
Pre
processing
////////
variables
used
here
and
also
in
the
main
.
nf
file
env
{
system_exec
=
"${system_exec}"
out_path
=
"${out_path}"
}
////////
variables
used
here
and
also
in
the
main
.
nf
file
////////
Scopes
//
kind
of
execution
.
Either
'local'
or
'slurm'
//
those
are
closures
.
See
https
://
www
.
nextflow
.
io
/
docs
/
latest
/
script
.
html
#closures
executor
{
name
=
"${system_exec}"
queueSize
=
2000
}
//
create
a
report
folder
and
print
a
html
report
file
.
If
no
absolute
path
,
will
be
where
the
run
is
executed
//
see
https
://
www
.
nextflow
.
io
/
docs
/
latest
/
config
.
html
#config-report
report
{
enabled
=
true
file
=
"${out_path}/reports/nf_report.html"
//
warning
:
here
double
quotes
to
get
the
nextflow
variable
interpretation
}
//
txt
file
with
all
the
processes
and
info
trace
{
enabled
=
true
file
=
"${out_path}/reports/nf_trace.txt"
}
//
html
file
with
all
the
processes
timeline
{
enabled
=
true
file
=
"${out_path}/reports/nf_timeline.html"
}
// .
dot
picture
of
the
workflow
.
Only
one
file
allowed
dag
{
enabled
=
true
file
=
"${out_path}/reports/nf_dag.png"
//
Warning
:
require
graphviz
installed
in
the
system
,
see
protocol
136
}
//
define
singularity
parameters
singularity
{
enabled
=
true
autoMounts
=
true
//
automatically
mounts
host
paths
in
the
executed
container
//
runOptions
=
'--home $HOME:/home/$USER --bind /pasteur'
//
provide
any
extra
command
line
options
supported
by
the
singularity
exec
.
HEre
,
fait
un
bind
de
tout
/
pasteur
dans
/
pasteur
du
container
.
Sinon
pas
d
acc
è
s
cacheDir
=
'singularity'
//
name
of
the
directory
where
remote
Singularity
images
are
stored
.
When
rerun
,
the
exec
directly
uses
these
without
redownloading
them
.
When
using
a
computing
cluster
it
must
be
a
shared
folder
accessible
to
all
computing
nodes
}
////////
end
Scopes
////////
directives
//
provide
the
default
directives
for
all
the
processes
in
the
main
.
nf
pipeline
calling
this
config
file
process
{
//
directives
for
all
the
processes
//
executor
=
'local'
//
no
need
because
already
defined
above
in
the
executor
scope
if
(
system_exec
==
'slurm'
){
queue
=
"$fastqueue"
clusterOptions
=
"$fastqos $add_options"
scratch
=
false
maxRetries
=
1
errorStrategy
=
'retry'
}
else
{
maxRetries
=
0
errorStrategy
=
'terminate'
}
//
all
the
processes
of
the
main
.
nf
file
with
the
label
'bedtools'
will
use
this
directives
by
default
withLabel
:
bash
{
container
=
'gmillot/bash-extended_v4.0:gitlab_v8.0'
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'3G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
alien_trimmer
{
container
=
'gmillot/alien_trimmer_v0.4.0:gitlab_v8.1'
//
no
most
recent
at
20210930
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'3G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
fastqc
{
container
=
'evolbioinfo/fastqc:v0.11.8'
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
r_ext
{
container
=
'gmillot/r_v4.0.5_extended_v2.0:gitlab_v6.4'
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'64G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
bowtie2
{
container
=
'gmillot/bowtie2_v2.3.4.3_extended_v2.0:gitlab_v8.0'
cpus
=
12
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'64G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
samtools
{
container
=
'gmillot/samtools_v1.14:gitlab_v8.0'
cpus
=
1
memory
=
'1G'
}
withLabel
:
bedtools
{
container
=
'gmillot/bedtools_v2.30.0:gitlab_v8.0'
cpus
=
12
memory
=
'64G'
}
//
all
the
processes
of
the
main
.
nf
file
with
the
label
'bedtools'
will
use
this
directives
by
withLabel
:
gatk
{
//
scratch
=
true
container
=
'broadinstitute/gatk:4.1.9.0'
memory
=
'60G'
if
(
system_exec
==
'slurm'
){
queue
= {
task
.
attempt
>
1
?
"$normalqueue"
:
"$fastqueue"
}
clusterOptions
= {
task
.
attempt
>
1
?
"$normalqos $add_options"
:
"$fastqos $add_options"
}
}
}
withLabel
:
bwa
{
container
=
"evolbioinfo/bwa:v0.7.17"
cpus
=
20
memory
=
'30G'
}
withLabel
:
bcftools
{
container
=
"evolbioinfo/bcftools:f27f849"
cpus
=
1
memory
=
'10G'
}
withLabel
:
multiqc
{
container
=
'ewels/multiqc:1.10.1'
errorStrategy
=
'ignore'
cpus
=
1
}
}
////////
end
directives
\ No newline at end of file
dev/test_slurm.config
deleted
100644 → 0
View file @
fb383cb6
/*
#########################################################################
## ##
## nextflow.config ##
## ##
## Gael A. Millot ##
## Bioinformatics and Biostatistics Hub ##
## Computational Biology Department ##
## Institut Pasteur Paris ##
## ##
#########################################################################
*/
////////
variables
that
will
be
used
only
in
the
main
.
nf
//
variables
exported
to
the
main
.
nf
environment
.
See
https
://
www
.
nextflow
.
io
/
docs
/
latest
/
config
.
html
#scope-env
env
{
////
path
and
files
git_path
=
"https://gitlab.pasteur.fr/gmillot/14985_loot/"
//
in_path
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset"
//
in_path
=
"/mnt/share/14985_loot/dataset/B2699/00_Rawdata"
//
in_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/3"
//
where
initial
fastq
file
is
in_path
=
"$baseDir/dataset/"
//
in_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B2699/00_Rawdata"
//
where
initial
fastq
file
is
fastq_file
=
"test.fastq2.gz"
//
fastq
file
name
//
fastq_file
=
"4-4_S1_L001_R1_001.fastq.gz"
//
fastq_file
=
"3-4_S1_L001_R1_001.fastq.gz"
//
primer_fasta
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
primer_fasta
=
"$baseDir/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
//
list
of
primers
used
for
the
library
and
used
by
Alien
trimmer
to
trim
the
raw
reads
//
primer_fasta
=
"/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
////
end
path
and
files
////
alientrimmer
alientrimmer_l_param
=
30
//
L
parameter
of
alienTrimmer
////
end
alientrimmer
////
fivep_filtering
attc_seq
=
"CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG"
//
sequence
of
attc
,
in
red
and
purple
in
section
4
20200505
of
the
CL
labbook
(
48
bases
on
the
left
of
the
cutting
site
).
Required
for
plotting
.
Warning
:
never
change
this
sequence
fivep_seq_filtering
=
'^CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG.+$'
//
regex
indicating
the
5
'
sequence
of
reads
to
select
,
then
to
trim
from
the
selected
reads
.
See
the
section
8
.
6
to
8
.
13
of
the
labbook
20200520
,
but
instead
of
analysing
and
trimming
in
two
steps
(
29
Nuc
of
AttC
part
of
the
primer
then
19
Nuc
between
primer
and
Attc
cutting
site
),
perform
all
in
a
single
step
,
and
play
with
the
regex
,
like
Test
also
// ^
CAATTCATTCAAGCCGACGCCGCTTCGCG
[
GN
][
CN
][
GN
][
CN
][
GN
][
GN
][
CN
][
TN
][
TN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
GN
].+$
// [
CN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
CN
][
GN
][
AN
][
CN
][
GN
][
CN
][
CN
][
GN
][
CN
][
TN
][
TN
][
CN
][
GN
][
CN
][
GN
][
GN
][
CN
][
GN
][
CN
][
GN
][
GN
][
CN
][
TN
][
TN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
GN
].+$
// ^
CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG
.+$
// ^[
CN
][
AN
][
AN
][
TN
][
TN
][
CN
][
AN
][
TN
][
TN
][
CN
][
AN
][
AN
][
GN
][
CN
][
CN
][
GN
][
AN
][
CN
][
GN
][
CN
][
CN
][
GN
][
CN
][
TN
][
TN
][
CN
][
GN
][
CN
][
GN
]
GCGCGGCTTAATTCAAGCG
.+$
fivep_seq_nb
=
48
//
must
be
the
exact
number
of
nuc
positions
indicated
in
fivep_seq_filtering
////
end
fivep_filtering
cutoff_nb
=
25
//
reads
of
length
cutoff_nb
after
trimming
are
removed
//
ref_path
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
ref_path
=
"$baseDir/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
//
path
of
the
reference
genome
ref_file
=
"Ecoli-K12-MG1655_ORI_CENTERED.fasta"
//
fasta
file
of
the
reference
genome
ori_coord
=
"2320711 2320942"
// [
2320711
,
2320942
] //
Ecoli
centered
coordinates
ter_coord
=
"4627368 4627400"
//[
4627368
,
4627400
] //
Ecoli
centered
coordinates
color_coverage
=
"5"
//
three
integers
for
the
color
of
the
three
coverage
plots
[
1
,
2
,
5
]
xlab
=
"Ecoli Genome (bp)"
//
name
of
the
reference
genome
for
graphics
genome_size
=
"4641652"
//
in
bp
cute_path
=
"https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v10.9.0/cute_little_R_functions.R"
//
single
character
string
indicating
the
file
(
and
absolute
pathway
)
of
the
required
cute_little_R_functions
toolbox
.
With
ethernet
connection
available
,
this
can
also
be
used
:
"https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R"
or
local
"C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R"
}
////////
end
variables
that
will
be
used
only
in
the
main
.
nf
////////
variables
that
will
be
used
below
(
and
potentially
in
the
main
.
nf
file
)
////
must
be
also
exported
system_exec
=
'slurm'
//
the
system
that
runs
the
workflow
.
Either
'local'
or
'slurm'
//
out_path
=
"/mnt/c/Users/Gael/Desktop"
//
where
the
report
file
will
be
saved
.
Example
report_path
=
'.'
for
where
the
main
.
nf
run
is
executed
or
report_path
=
'/mnt/c/Users/Gael/Desktop'
out_path
=
"$baseDir/results/"
//
where
the
report
file
will
be
saved
.
Example
report_path
=
'.'
for
where
the
main
.
nf
run
is
executed
or
report_path
=
'/mnt/c/Users/Gael/Desktop'
////
end
must
be
also
exported
////
general
variables
result_folder_name
=
"20220120_res_CL14985_test"
////
end
general
variables
////
slurm
variables
fastqueue
=
'common,dedicated'
//
fast
for
-
p
option
of
slurm
.
Example
:
fastqueue
=
'common,dedicated'
.
Example
:
fastqueue
=
'hubbioit'
fastqos
=
'--qos=fast'
//
fast
for
--
qos
option
of
slurm
.
Example
:
fastqos
=
'--qos=fast'
normalqueue
=
'common,dedicated'
//
normal
for
-
p
option
of
slurm
.
Example
:
normalqueue
=
'bioevo'
normalqos
=
'--qos=hubbioit'
//
normal
for
--
qos
option
of
slurm
.
Example
:
normalqos
=
'--qos=dedicated'
longqueue
=
'common,dedicated'
//
slow
for
-
p
option
of
slurm
.
Example
:
longqueue
=
'bioevo'
longqos
=
'--qos=hubbioit'
//
slow
for
--
qos
option
of
slurm
.
Example
:
longqos
=
'--qos=dedicated'
add_options
=
' '
//
additional
option
of
slurm
.
Example
:
addoptions
=
'--exclude=maestro-1101,maestro-1034'
or
add_options
=
' '
////
end
slurm
variables
////////
end
variables
that
will
be
used
below
////////
Pre
processing
int
secs
= (
new
Date
().
getTime
())/
1000
out_path
=
"${out_path}/${result_folder_name}_${secs}"
////////
end
Pre
processing
////////
variables
used
here
and
also
in
the
main
.
nf
file
env
{
system_exec
=
"${system_exec}"
out_path
=
"${out_path}"
}
////////
variables
used
here
and
also
in
the
main
.
nf
file
////////
Scopes
//
kind
of
execution
.
Either
'local'
or
'slurm'
//
those
are
closures
.
See
https
://
www
.
nextflow
.
io
/
docs
/
latest
/
script
.
html
#closures
executor
{
name
=
"${system_exec}"
queueSize
=
2000
}
//
create
a
report
folder
and
print
a
html
report
file
.
If
no
absolute
path
,
will
be
where
the
run
is
executed
//
see
https
://
www
.
nextflow
.
io
/
docs
/
latest
/
config
.
html
#config-report
report
{
enabled
=
true
file
=
"${out_path}/reports/nf_report.html"
//
warning
:
here
double
quotes
to
get
the
nextflow
variable
interpretation
}
//
txt
file
with
all
the
processes
and
info
trace
{
enabled
=
true
file
=
"${out_path}/reports/nf_trace.txt"
}
//
html
file
with
all
the
processes
timeline
{
enabled
=
true
file
=
"${out_path}/reports/nf_timeline.html"
}
// .
dot
picture
of
the
workflow
.
Only
one
file
allowed
dag
{
enabled
=
true
file
=
"${out_path}/reports/nf_dag.png"
//
Warning
:
require
graphviz
installed
in
the
system
,
see
protocol
136
}
//
define
singularity
parameters
singularity
{
enabled
=
true
autoMounts
=
true
//
automatically
mounts
host
paths
in
the
executed
container
//
runOptions
=
'--home $HOME:/home/$USER --bind /pasteur'
//
provide
any
extra
command
line
options
supported
by
the
singularity
exec
.
HEre
,
fait
un
bind
de
tout
/
pasteur
dans
/
pasteur
du
container
.
Sinon
pas
d
acc
è
s
cacheDir
=
'singularity'
//
name
of
the
directory
where
remote
Singularity
images
are
stored
.
When
rerun
,
the
exec
directly
uses
these
without
redownloading
them
.
When
using
a
computing
cluster
it
must
be
a
shared
folder
accessible
to
all
computing
nodes
}
////////
end
Scopes
////////
directives
//
provide
the
default
directives
for
all
the
processes
in
the
main
.
nf
pipeline
calling
this
config
file
process
{
//
directives
for
all
the
processes
//
executor
=
'local'
//
no
need
because
already
defined
above
in
the
executor
scope
if
(
system_exec
==
'slurm'
){
queue
=
"$fastqueue"
clusterOptions
=
"$fastqos $add_options"
scratch
=
false
maxRetries
=
1
errorStrategy
=
'retry'
}
else
{
maxRetries
=
0
errorStrategy
=
'terminate'
}
//
all
the
processes
of
the
main
.
nf
file
with
the
label
'bedtools'
will
use
this
directives
by
default
withLabel
:
bash
{
container
=
'gmillot/bash-extended_v4.0:gitlab_v8.0'
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'3G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
alien_trimmer
{
container
=
'gmillot/alien_trimmer_v0.4.0:gitlab_v8.1'
//
no
most
recent
at
20210930
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'3G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
fastqc
{
container
=
'evolbioinfo/fastqc:v0.11.8'
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
r_ext
{
container
=
'gmillot/r_v4.0.5_extended_v2.0:gitlab_v6.4'
cpus
=
1
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'64G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
bowtie2
{
container
=
'gmillot/bowtie2_v2.3.4.3_extended_v2.0:gitlab_v8.0'
cpus
=
12
//
only
used
when
name
=
"local"
in
the
executor
part
above
memory
=
'64G'
//
only
used
when
name
=
"local"
in
the
executor
part
above
}
withLabel
:
samtools
{
container
=
'gmillot/samtools_v1.14:gitlab_v8.0'
cpus
=
1
memory
=
'1G'
}
withLabel
:
bedtools
{
container
=
'gmillot/bedtools_v2.30.0:gitlab_v8.0'
cpus
=
12
memory
=
'64G'
}
//
all
the
processes
of
the
main
.
nf
file
with
the
label
'bedtools'
will
use
this
directives
by
withLabel
:
gatk
{
//
scratch
=
true
container
=
'broadinstitute/gatk:4.1.9.0'
memory
=
'60G'
if
(
system_exec
==
'slurm'
){
queue
= {
task
.
attempt
>
1
?
"$normalqueue"
:
"$fastqueue"
}
clusterOptions
= {
task
.
attempt
>
1
?
"$normalqos $add_options"
:
"$fastqos $add_options"
}
}
}
withLabel
:
bwa
{
container
=
"evolbioinfo/bwa:v0.7.17"
cpus
=
20
memory
=
'30G'
}
withLabel
:
bcftools
{
container
=
"evolbioinfo/bcftools:f27f849"
cpus
=
1
memory
=
'10G'
}
withLabel
:
multiqc
{
container
=
'ewels/multiqc:1.10.1'
errorStrategy
=
'ignore'
cpus
=
1
}
}
////////
end
directives
\ No newline at end of file
nextflow.config
View file @
a12b9b1b
...
...
@@ -20,15 +20,15 @@
env
{
////
path
and
files
git_path
=
"https://gitlab.pasteur.fr/gmillot/14985_loot/"
//
in_path
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset"
//
in_path
=
"/mnt/share/14985_loot/dataset/B2699/00_Rawdata"
in_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/4"
//
where
initial
fastq
file
is
//
in_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B2699/00_Rawdata"
//
where
initial
fastq
file
is
in_path
=
"$baseDir/dataset"
//
in_path
=
"$baseDir/dataset/B4985/4"
//
where
initial
fastq
file
is
//
in_path
=
"$baseDir/dataset/B2699/00_Rawdata"
//
where
initial
fastq
file
is
//
fastq_file
=
"test.fastq.gz"
//
fastq
file
name
//
fastq_file
=
"Pool-B2699_S1_L001_R1_001.fastq.gz"
fastq_file
=
"4-4_S1_L001_R1_001.fastq.gz"
//
primer_fasta
=
"/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
primer_fasta
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
//
list
of
primers
used
for
the
library
and
used
by
Alien
trimmer
to
trim
the
raw
reads
fastq_file
=
"test.fastq2.gz"
//
fastq_file
=
"4-4_S1_L001_R1_001.fastq.gz"
//
primer_fasta
=
"$baseDir/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
primer_fasta
=
"$baseDir/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
//
list
of
primers
used
for
the
library
and
used
by
Alien
trimmer
to
trim
the
raw
reads
//
primer_fasta
=
"/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
////
end
path
and
files
...
...
@@ -47,7 +47,8 @@ env {
added_nb
=
3
//
number
of
nucleotids
taken
after
fivep_seq_nb
for
graphic
display
,
to
see
that
the
frequency
of
each
base
tends
toward
0
.
25
after
fivep_seq_nb
on
the
graph
////
end
fivep_filtering
cutoff_nb
=
25
//
reads
of
length
cutoff_nb
after
trimming
are
removed
ref_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/reference_genomes/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
//
path
of
the
reference
genome
ref_path
=
"$baseDir/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
//
path
of
the
reference
genome
//
ref_path
=
"/pasteur/zeus/projets/p01/BioIT/gmillot/reference_genomes/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
//
path
of
the
reference
genome
ref_file
=
"Ecoli-K12-MG1655_ORI_CENTERED.fasta"
//
name
of
the
the
reference
genome
fasta
file
ori_coord
=
"2320711 2320942"
// [
2320711
,
2320942
] //
Ecoli
centered
coordinates
ter_coord
=
"4627368 4627400"
//[
4627368
,
4627400
] //
Ecoli
centered
coordinates
...
...
@@ -65,10 +66,10 @@ env {
////////
variables
that
will
be
used
below
(
and
potentially
in
the
main
.
nf
file
)
////
must
be
also
exported
system_exec
=
'
slurm
'
//
the
system
that
runs
the
workflow
.
Either
'local'
or
'slurm'
system_exec
=
'
local
'
//
the
system
that
runs
the
workflow
.
Either
'local'
or
'slurm'
//
docker_exe
=
true
//
true
for
docker
and
false
for
singularity
//
out_path
=
"/mnt/c/Users/Gael/Desktop"
//
where
the
report
file
will
be
saved
.
Example
report_path
=
'.'
for
where
the
main
.
nf
run
is
executed
or
report_path
=
'/mnt/c/Users/Gael/Desktop'