diff --git a/paper/paper.code.txt b/paper/paper.code.txt new file mode 100644 index 0000000000000000000000000000000000000000..94fca9129a076b83b99865e643bf855e73310fa3 --- /dev/null +++ b/paper/paper.code.txt @@ -0,0 +1,32 @@ +#################################### +## Example: procedure source code ## +#################################### + +# downloading PE FASTQ file using wget +wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR207/009/SRR2079909/SRR2079909_1.fastq.gz +wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR207/009/SRR2079909/SRR2079909_2.fastq.gz + +# trimming/clipping PE FASTQ files using AlienTrimmer (https://research.pasteur.fr/en/software/alientrimmer) +echo -e ">poly-A\nAAAAAAAAAAAAAAAAAA\n>poly-C\nCCCCCCCCCCCCCCCCCC" > polys.fa +AlienTrimmer -1 SRR2079909_1.fastq.gz -2 SRR2079909_2.fastq.gz -a polys.fa -q 20 -l 100 -o SRR2079909 +rm SRR2079909.S.fastq polys.fa + +# estimating the number FO of distinct k-mers (k=25) using ntCard (https://github.com/bcgsc/ntCard) +ntcard -t 12 -k 25 -p SRR2079909 SRR2079909.1.fastq SRR2079909.2.fastq + +# running ROCK with kappa=45 and kappa' varying from 0 to 8 +rock -k 25 -C 45 -n 105584331 SRR2079909.1.fastq,SRR2079909.2.fastq +rock -k 25 -c 2 -C 45 -n 105584331 SRR2079909.1.fastq,SRR2079909.2.fastq +rock -k 25 -c 4 -C 45 -n 105584331 SRR2079909.1.fastq,SRR2079909.2.fastq +rock -k 25 -c 6 -C 45 -n 105584331 SRR2079909.1.fastq,SRR2079909.2.fastq +rock -k 25 -c 8 -C 45 -n 105584331 SRR2079909.1.fastq,SRR2079909.2.fastq + +# running normalize-by-median (https://github.com/dib-lab/khmer) +paste <( paste - - - - < SRR2079909.1.fastq ) <( paste - - - - < SRR2079909.2.fastq ) | tr '\t' '\n' > SRR2079909.i.fastq +normalize-by-median.py -M 128G -k 25 -C 45 -p -o SRR2079909.khmer.i.fastq SRR2079909.i.fastq + +# running BBnorm (https://sourceforge.net/projects/bbmap) +bbnorm.sh -Xmx128G in=SRR2079909.1.fastq in2=SRR2079909.2.fastq out=SRR2079909.bbnorm.1.fastq out2=SRR2079909.bbnorm.2.fastq k=25 target=45 threads=12 + +# running BigNorm (https://git.informatik.uni-kiel.de/axw/Bignorm) +Bignorm -1 SRR2079909.1.fastq -2 SRR2079909.2.fastq -b -d 1 -k 25 -C 45 -n