Commit f6ed03f2 authored by Amine  GHOZLANE's avatar Amine GHOZLANE

Update galaxy approach and tars help

parent a39b705a
......@@ -11,38 +11,15 @@ workflow.onError = {
println "Oops .. something went wrong"
}
params.help=false
def usage() {
println("blast_approach.nf --in <csv_file> --kronaout <output_krona> --resumeout <output_resume> --annotationout <output_annotation> --cpus <nb_cpus> -w <output_temp>")
println("--in Directory containing fastq files (Single end only). Full path is mandatory")
println("--kronaout Output krona file (default ${baseDir}/krona.html)")
println("--resumeout Output resume file (default ${baseDir}/resume_table.tsv)")
println("--annotationout Output annotation file (default ${baseDir}/annotation_table.tsv)")
println("--countout Output count file (default ${baseDir}/count_table.tsv)")
println("--cpus Number of cpus for process (default 2)")
println("-w Temporary output (usually /pasteur/scratch/username)")
println("--identity Read minimum identity with blast in percent (Default 50)")
println("--coverage Read minimum coverage with blast in percent (Default 50)")
println("--identity Read minimum identity with diamond in percent (Default 40)")
println("--coverage Read minimum coverage with diamond in percent (Default 40)")
println("--evalue E-value threshold (Default 1E-3)")
}
if(params.help){
usage()
exit(1)
}
// General parameters
params.in="${baseDir}/test/"
params.cpus = 6
params.mail = "amine.ghozlane@pasteur.fr"
params.kronaout = "${baseDir}/krona.html"
params.resumeout = "${baseDir}/resume_table.tsv"
params.annotationout = "${baseDir}/annotation_table.tsv"
params.countout = "${baseDir}/count_table.tsv"
params.out = "${HOME}/"
params.kronaout = "${params.out}/krona.html"
params.resumeout = "${params.out}/resume_table.tsv"
params.annotationout = "${params.out}/annotation_table.tsv"
params.countout = "${params.out}/count_table.tsv"
// Annotation parameters
params.identity = 50
params.dia_identity = 40
......@@ -52,7 +29,7 @@ params.evalue = 1E-3
params.hit = 1
params.wordsize = 28
params.minlength = 35
params.memory_mbma = 60000
params.memory_mbma = 30000
// Databases
params.alienseq = "$baseDir/databases/alienTrimmerPF8contaminants.fasta"
params.genomes = "$baseDir/databases/genomes_resume.fasta"
......@@ -62,15 +39,47 @@ params.phi = "/local/databases/index/bowtie/2.1.0/phiX.fa"
params.nt = "/local/databases/fasta/nt"
params.taxadb = "/local/databases/rel/taxadb/current/db/taxadb_full.sqlite"
params.nrdb = "/pasteur/scratch/amine/nr.dmnd"
params.out = "$baseDir/"
myDir = file(params.out)
myDir.mkdirs()
params.help=false
def usage() {
println("animalerie-wf.nf --in <input_dir> --out <output_dir>")
println("--in Directory containing fastq files (Single end only, in format .fastq or .fastq.gz, default ${params.in}).")
println("--out Output directory (default ${params.out}). ")
println("--kronaout Output krona file (default ${params.out}krona.html)")
println("--resumeout Output resume file (default ${params.out}resume_table.tsv)")
println("--annotationout Output annotation file (default ${params.out}annotation_table.tsv)")
println("--countout Output count file (default ${params.out}count_table.tsv)")
println("--cpus Number of cpus for process (default ${params.cpus})")
println("-w Temporary output (usually /pasteur/scratch/animalerie-wf)")
println("--identity Read minimum identity with blast in percent (Default ${params.identity})")
println("--coverage Read minimum coverage with blast in percent (Default ${params.coverage})")
println("--dia_identity Read minimum identity with diamond in percent (Default ${params.dia_identity})")
println("--dia_coverage Read minimum coverage with diamond in percent (Default ${params.dia_coverage})")
println("--evalue E-value threshold (Default ${params.evalue})")
}
if(params.help){
usage()
exit(1)
}
fastqChannel = Channel.fromPath("${params.in}")
.ifEmpty { exit 1, "Cannot find read file: ${params.in}" }
.splitCsv(sep: "\t")
.groupTuple()
.map{it -> [it[0], it[1][0]] }
/*fastqChannel = Channel
.fromPath("${params.in}/*.{fastq,fastq.gz}")
.map{
file -> tuple(file.baseName,file)
}
.ifEmpty { exit 1, "Cannot find any reads matching: ${params.in}" }*/
process fastqfiltering {
......@@ -79,6 +88,7 @@ process fastqfiltering {
input:
set sample_id, reads from fastqChannel
//set sample_id, file(reads) from fastqChannel
output:
set sample_id, file("*_notmapped.fastq") into filteringChannel
......@@ -97,8 +107,18 @@ process fastqfiltering {
bowtie2 -p !{params.cpus} --sensitive-local -x !{params.phi}\
-U !{sample_id}_notmapped_mouse.fastq -S /dev/null \
--un !{sample_id}_notmapped.fastq > !{sample_id}_mapping_phi.txt 2>&1
nb_raw=\$(echo \$((`wc -l < !{reads}` / 4)))
echo -e "Number of raw reads\t\$nb_raw" > !{sample_id}_resume_nb_reads.txt
case "!{reads}" in
*.gz )
nb_raw=\$(echo \$((\$(gunzip -c !{reads} | wc -l) / 4)))
echo -e "Number of raw reads\t\$nb_raw" > !{sample_id}_resume_nb_reads.txt
;;
*.fastq)
nb_raw=\$(echo \$((`wc -l < !{reads}` / 4)))
echo -e "Number of raw reads\t\$nb_raw" > !{sample_id}_resume_nb_reads.txt
;;
esac
nb_filt=\$(echo \$((`wc -l < !{sample_id}_notmapped.fastq` / 4)))
echo -e "Number of reads after filtering\t\$nb_filt" >> !{sample_id}_resume_nb_reads.txt
"""
......@@ -194,7 +214,7 @@ process mbma_krona {
for samp in \$(head -n1 !{counts} | cut -f 3- -d \$'\t'); do
tail -n +2 !{counts} | cut -f \$((2 + \$i)),1 -d \$'\t' | awk -F \$'\t' '{print \$2, \$1}' OFS=\$'\t' > \${samp}_mbma_count.txt
i=\$((\$i + 1))
done
done
"""
}
......@@ -263,10 +283,10 @@ process taxonomy {
ExtractNCBIDB2.py -f !{nt} -g !{sample_id}_taxonomy.txt -nb 1 \
-o !{sample_id}_annotation.txt
# Get sequence not annotated
if [ -f !{sample_id}_annotation.txt ]
if [ -f "!{sample_id}_annotation.txt" ]
then
extract_fasta.py -q !{sample_id}_annotation.txt \
-t !{fasta} -n -o !{sample_id}_not_annotated.fasta
-t !{fasta} -n -o !{sample_id}_not_annotated.fasta
else
touch !{sample_id}_annotation.txt !{sample_id}_not_annotated.fasta
fi
......@@ -283,7 +303,7 @@ process taxonomy {
process diamond {
cpus params.cpus
memory "30G"
memory "15G"
input:
set sample_id, file(fasta), file(notannotatedfasta), file(taxblast) from notAnnotatedChannel
......@@ -386,7 +406,7 @@ process krona {
ktImportText \${files} -o res.html
"""
}
}
kronaChannel.subscribe { it.copyTo("${params.kronaout}") }
......@@ -402,10 +422,10 @@ process resume {
output:
file("resume.tsv") into resumeout
script:
"""
resume.py -i ${resmap} ${restax} ${restaxdia} -m ${map} -mg ${resmapgenome} -o resume.tsv
resume.py -i ${resmap} ${restrim} ${restax} ${restaxdia} -m ${map} -mg ${resmapgenome} -o resume.tsv
"""
}
......
......@@ -57,8 +57,8 @@ def usage() {
println("-w Temporary output (usually /pasteur/scratch/animalerie-wf)")
println("--identity Read minimum identity with blast in percent (Default ${params.identity})")
println("--coverage Read minimum coverage with blast in percent (Default ${params.coverage})")
println("--identity Read minimum identity with diamond in percent (Default ${params.dia_identity})")
println("--coverage Read minimum coverage with diamond in percent (Default ${params.dia_coverage})")
println("--dia_identity Read minimum identity with diamond in percent (Default ${params.dia_identity})")
println("--dia_coverage Read minimum coverage with diamond in percent (Default ${params.dia_coverage})")
println("--evalue E-value threshold (Default ${params.evalue})")
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment