Commit fb383cb6 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

Merge branch 'master' of gitlab.pasteur.fr:gmillot/14985_loot

parents a632a752 d48fa3f3
......@@ -19,7 +19,7 @@ input_file=$1
output_file=$2
log=$3
echo -e "<br /><br />\n\n### Removal for reads made of N only\n\n" >> ${log}
echo -e "<br /><br />\n\n### Removal for reads made of N only\n\n" > ${log}
zcat ${input_file} | awk '{lineKind=(NR-1)%4;}lineKind==0{record=$0; next}lineKind==1{toGet=!($0~/^N*$/); if(toGet) print record}toGet' | gzip -c > ${output_file}
# warning: with no output dir for log.txt, the file is created in \\wsl$\Ubuntu-20.04\home\gael\work\35\b826898b7be994ff13b7bc73bc88d8\
# get the bad sequences + 3 other lines of the fastq #see https://stackoverflow.com/questions/11793942/delete-lines-before-and-after-a-match-in-bash-with-sed-or-awk
......
......@@ -21,7 +21,7 @@ log=$4
echo -e "<br /><br />\n\n### Selection of reads over ${nb} bases\n\n" >> ${log}
echo -e "<br /><br />\n\n### Selection of reads over ${nb} bases\n\n" > ${log}
# cutoff
awk -v var1=${nb} '{lineKind=(NR-1)%4}lineKind==0{record=$0; next}lineKind==1{toGet=(length($0)>=var1); if(toGet) print record}toGet' ${input_file} > ${output_file}_cutoff.fq
......
......@@ -28,7 +28,7 @@ log=$5
# log="report.rmd"
echo -e "<br /><br />\n\n### Removal of duplicates using the 5\' and 3\' coordinates\n\n" >> ${log}
echo -e "<br /><br />\n\n### Removal of duplicates using the 5\' and 3\' coordinates\n\n" > ${log}
SAMPLE_NAME=${input_file%.*} # recover the name of the file without extension
# check that no BX:Z: TAG already exists
......
......@@ -27,7 +27,7 @@ attc_seq=$7
log=$8
echo -e "<br /><br />\n\n### Selection of reads with the attC in 5'\n\n" >> ${log}
echo -e "<br /><br />\n\n### Selection of reads with the attC in 5'\n\n" > ${log}
# fastq filtering
awk -v var1=${fivep_seq_filtering} '
{lineKind=(NR-1)%4;}
......
......@@ -21,7 +21,7 @@ primer_fasta=$3
alientrimmer_l_param=$4
log=$5
echo -e "<br /><br />\n\n### Trim of the read for the primer parts\n\n" >> ${log}
echo -e "<br /><br />\n\n### Trim of the read for the primer parts\n\n" > ${log}
# sed '/^>.*$/d' ref_seq/adapters_TruSeq_B2699.fasta > tempo.adap.seq #in case we want to remove titles of the fasta files, but no need for AlienTrimmer
gzip ${input_file} -dc > input_file2
AlienTrimmer -i input_file2 -c ${primer_fasta} -o ${output_file} -l ${alientrimmer_l_param} | tee tempo.txt
......
......@@ -21,13 +21,13 @@ env {
//in_path="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset"
//in_path="/mnt/share/14985_loot/dataset/B2699/00_Rawdata"
//in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/3" // where initial fastq file is
in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/"
in_path="$baseDir/dataset/"
//in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B2699/00_Rawdata" // where initial fastq file is
fastq_file="test.fastq2.gz" // fastq file name
//fastq_file="4-4_S1_L001_R1_001.fastq.gz"
//fastq_file="3-4_S1_L001_R1_001.fastq.gz"
//primer_fasta="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
primer_fasta="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta" // list of primers used for the library and used by Alien trimmer to trim the raw reads
primer_fasta="$baseDir/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta" // list of primers used for the library and used by Alien trimmer to trim the raw reads
//primer_fasta="/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
//// end path and files
......@@ -46,7 +46,7 @@ env {
//// end fivep_filtering
cutoff_nb=25 // reads of length cutoff_nb after trimming are removed
//ref_path="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/"
ref_path="/pasteur/zeus/projets/p01/BioIT/gmillot/reference_genomes/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/" // path of the reference genome
ref_path="$baseDir/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/" // path of the reference genome
ref_file="Ecoli-K12-MG1655_ORI_CENTERED.fasta" // fasta file of the reference genome
ori_coord="2320711 2320942" // [2320711, 2320942] // Ecoli centered coordinates
ter_coord="4627368 4627400" //[4627368, 4627400] // Ecoli centered coordinates
......@@ -67,7 +67,7 @@ env {
//// must be also exported
system_exec = 'slurm' // the system that runs the workflow. Either 'local' or 'slurm'
//out_path="/mnt/c/Users/Gael/Desktop" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop'
out_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop'
out_path="$baseDir/results/" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop'
//// end must be also exported
//// general variables
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment