diff --git a/src/ROCKparams.cpp b/src/ROCKparams.cpp index 8f1f46c6e8cd2183495d006e4439df34a61bf97f..ee2dd575f517a65d1b3f1c4e045ca0220dbd9929 100644 --- a/src/ROCKparams.cpp +++ b/src/ROCKparams.cpp @@ -62,7 +62,6 @@ void ROCKparams::optArgConsistency(const string& input_file,const string& output usage(EXIT_FAILURE); } if (nb_k_mers) { // user indicated number of k-mers; use it to minimize parms.lambda - //parms.lambda=getBestLambdaForN(nb_k_mers,parms.lambda); int smallest_kappa; if (parms.kappa_prime==0) smallest_kappa=parms.kappa; else smallest_kappa=parms.kappa_prime; @@ -259,16 +258,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { std::vector<string> v_input_lines; std::vector<string> v_output_lines; static int PE_separately=1; -/* - static struct option long_options[] = - { - {"separate_PE", no_argument, &PE_as_single,0}, - {0,0,0,0} - };*/ - - //int option_index=0; - // while((i = getopt_long(argc, argv, "i:o:l:k:c:C:g:n:vq:m:",long_options,&option_index)) != -1) { while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vq:m:")) != -1) { switch(i) { case 0: @@ -304,24 +294,9 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { break; case 'n': nb_k_mers=atoi(optarg);break; // number of distinct k-mers -/* case 'g': - // cout<<optarg<<endl; - g=atoi(optarg); - break;*/ case 'v': verbose_mode=1; break; -/* case 'p': - if (strlen(optarg)>1) { - cout<<"value for -p option must be 0, 1 or 2"; - usage(EXIT_FAILURE); - } - PE_separately=atoi(optarg); - if (PE_separately<0 or PE_separately>2) { - cout<<"value for -p option must be 0, 1 or 2"; - usage(EXIT_FAILURE); - } - break;*/ case 'q': q=atoi(optarg); if (q<0) { @@ -342,12 +317,6 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { default: usage(EXIT_FAILURE); break; } } - /*if (!PE_as_single && qual_thres.min_correct_k_mers_in_read!=1) { - cout<<"Incompatible options: when PE are processed independently, minimum number of correct k-mers in each end is 1."<<endl; - usage(EXIT_FAILURE); - }*/ - //parms.filter_PE_separately=PE_separately; - //cout<<PE_as_single<<endl; processMainArgs(optind, argc, argv,v_input_lines); optArgConsistency(input_file,output_file,g,parms,nb_k_mers,v_input_lines); if (nb_k_mers) { diff --git a/src/ROCKparams.h b/src/ROCKparams.h index cfc0baa2288f3e88c2480eb3a8494971bd991e0b..96652c4043587387a0f0791e83c71fdfba023b15 100644 --- a/src/ROCKparams.h +++ b/src/ROCKparams.h @@ -87,7 +87,7 @@ public: ROCKparams() { f_id=0; // to generate id of input/output fastq files. +1=total number of files. g=0; - k=30; + k=25; nb_k_mers=0; parms.kappa=70; parms.kappa_prime=5; diff --git a/src/main_utils.cpp b/src/main_utils.cpp index 58ac7c5c025995e5553cc82a4ad31bc6e5a39023..3ec1bad524ff84f7fdbf8e9b9643ce8934b09dd9 100644 --- a/src/main_utils.cpp +++ b/src/main_utils.cpp @@ -69,7 +69,7 @@ std::string checkDirExists(const string o_file) { * logarithm of the gamma function; * from here: https://www.cec.uchile.cl/cinetica/pcordero/MC_libros/NumericalRecipesinC.pdf */ -//#include <stdio.h> + double gammln(const unsigned long& xx) { double x,y,tmp,ser; @@ -134,17 +134,6 @@ int getBestLambdaForN(const unsigned long& nb_k_mers,const int& smallest_kappa,c return min_l; } -/* -int getBestLambdaForN_altn(const unsigned long& nb_k_mers,const int& smallest_kappa,const unsigned int& m,const float& max_collision_proba) { - float ccdfr=ccdf(smallest_kappa,nb_k_mers,m); - int l=0; - float p=ccdfr; - while(l<50 and p>max_collision_proba) { - p*=ccdfr; - l++; - } - return l-1; -}*/ @@ -159,13 +148,11 @@ void usage(int status) { cout<<" -i <file> .... file name containing the list of the FASTQ files to process."<<endl; cout<<" -o <file> .... file name containing the list of the output FASTQ file names."<<endl; cout<<" -h .... Print this message and exit."<<endl; - cout<<" -k <int> .... k-mer size. (default 30)."<<endl; + cout<<" -k <int> .... k-mer size. (default 25)."<<endl; cout<<" -c <int> .... lower coverage threshold (default: 5)."<<endl; cout<<" -C <int> .... upper coverage threshold (default: 70; max: 65535)."<<endl; cout<<" -l <int> .... size of the count min sketch (default: at most 4, depending on the available RAM)"<<endl; cout<<" -n <int> .... (expected) number of distinct k-mers within the processed reads."<<endl; - // cout<<" -g <int> .... maximum RAM to use (in Gb) for the CMS."<<endl; - // cout<<" -p <int> .... treat PE as single, separately with strict -c filter, separately with lenient -c fileter. Default is 1."<<endl; cout<<" -q <int> .... minimum quality threshold for nucleotides. Default is 10."<<endl; cout<<" -m <int> .... minimum of valid k-mer threshold for reads. Default is 1."<<endl; cout<<" -v .... verbose"<<endl; diff --git a/src/main_utils.h b/src/main_utils.h index d4458a9fbf41ed49f39abd66dc6f2227cfce699f..59e72e2d9c42d91a133f0917f88b5651579076e9 100644 --- a/src/main_utils.h +++ b/src/main_utils.h @@ -8,10 +8,6 @@ #ifndef MAIN_UTILS_H_ #define MAIN_UTILS_H_ -//#define k_max_input_files 15 -/*#define k_sep_pair_end ',' -#define k_ext '.' -#define path_sep '/'*/ #define k_max_collision_proba 0.1 diff --git a/test/rock_mem.sh b/test/rock_mem.sh index 6b436a60d39ec83372323388a2c26bbd0b4b225a..eaed30fcf2cab5112b75eb558e8509604b7d6a4c 100755 --- a/test/rock_mem.sh +++ b/test/rock_mem.sh @@ -108,18 +108,18 @@ expected_diff2="> @SRR1222430.37 37 length=251 \ > A3>333BFA2FF4GBFFDGGCGED?FGEGGHGFFFEEG?AF13@50>///11B13@@1/>//B0?>////<//B/00??@/--:--.;:C000;:0/0009.-9:.00:-.;9/9...-;.--9@--9:////-9-9..////9/;//;9///.9-..--------..99.9.//////;-;--9-.////://9/9.;.-;-99-.//.;////-;?9;...9-9-----9;-.;.;/.-9.;/99=--;" mkdir tmp -../src/rock -C 100 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 140 +../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 140 -../src/rock -C 100 -c 1 -l 2 -q 2 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 141 +../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 141 ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` test $ret1 -eq 0 || exit 142 ret2=`diff tmp/klebsiella_100_2_very_low_qual_thres.fq tmp/klebsiella_100_2_no_qual_thres.fq|wc -l` test $ret2 -eq 0 || exit 143 -../src/rock -C 100 -c 1 -l 2 -q 12 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_12.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 144 +../src/rock -C 100 -k 30 -c 1 -l 2 -q 12 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_12.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 144 -../src/rock -C 100 -c 1 -l 2 -q 13 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 145 +../src/rock -C 100 -k 30 -c 1 -l 2 -q 13 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 145 ret1=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "@SRR1222430.37"` test $ret1 -eq 1 || exit 146 @@ -140,9 +140,9 @@ echo "########################################################################## echo "testing ROCK with a quality score threshold for nucleotides and minimum number of valid k-mer to keep a read." mkdir tmp -../src/rock -C 100 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 150 +../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 150 -../src/rock -C 100 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 151 +../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 151 ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` test $ret1 -eq 0 || exit 152 @@ -159,7 +159,7 @@ ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l` test $ret1 -eq 0 || exit 156 test $ret1 -eq 0 || exit 157 -../src/rock -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 158 +../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 158 [ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 159 [ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 160 @@ -170,7 +170,7 @@ ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l` test $ret1 -eq 400 || exit 161 test $ret1 -eq 400 || exit 162 -../src/rock -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 163 +../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 163 [ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 164 [ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 165