Skip to content
Snippets Groups Projects
Commit 4d3ed748 authored by Veronique Legrand's avatar Veronique Legrand
Browse files

changed default value for parameter k; it is now 25; also did some cleaning

parent a8876fa8
No related branches found
No related tags found
No related merge requests found
...@@ -62,7 +62,6 @@ void ROCKparams::optArgConsistency(const string& input_file,const string& output ...@@ -62,7 +62,6 @@ void ROCKparams::optArgConsistency(const string& input_file,const string& output
usage(EXIT_FAILURE); usage(EXIT_FAILURE);
} }
if (nb_k_mers) { // user indicated number of k-mers; use it to minimize parms.lambda if (nb_k_mers) { // user indicated number of k-mers; use it to minimize parms.lambda
//parms.lambda=getBestLambdaForN(nb_k_mers,parms.lambda);
int smallest_kappa; int smallest_kappa;
if (parms.kappa_prime==0) smallest_kappa=parms.kappa; if (parms.kappa_prime==0) smallest_kappa=parms.kappa;
else smallest_kappa=parms.kappa_prime; else smallest_kappa=parms.kappa_prime;
...@@ -259,16 +258,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { ...@@ -259,16 +258,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
std::vector<string> v_input_lines; std::vector<string> v_input_lines;
std::vector<string> v_output_lines; std::vector<string> v_output_lines;
static int PE_separately=1; static int PE_separately=1;
/*
static struct option long_options[] =
{
{"separate_PE", no_argument, &PE_as_single,0},
{0,0,0,0}
};*/
//int option_index=0;
// while((i = getopt_long(argc, argv, "i:o:l:k:c:C:g:n:vq:m:",long_options,&option_index)) != -1) {
while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vq:m:")) != -1) { while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vq:m:")) != -1) {
switch(i) { switch(i) {
case 0: case 0:
...@@ -304,24 +294,9 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { ...@@ -304,24 +294,9 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
break; break;
case 'n': case 'n':
nb_k_mers=atoi(optarg);break; // number of distinct k-mers nb_k_mers=atoi(optarg);break; // number of distinct k-mers
/* case 'g':
// cout<<optarg<<endl;
g=atoi(optarg);
break;*/
case 'v': case 'v':
verbose_mode=1; verbose_mode=1;
break; break;
/* case 'p':
if (strlen(optarg)>1) {
cout<<"value for -p option must be 0, 1 or 2";
usage(EXIT_FAILURE);
}
PE_separately=atoi(optarg);
if (PE_separately<0 or PE_separately>2) {
cout<<"value for -p option must be 0, 1 or 2";
usage(EXIT_FAILURE);
}
break;*/
case 'q': case 'q':
q=atoi(optarg); q=atoi(optarg);
if (q<0) { if (q<0) {
...@@ -342,12 +317,6 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { ...@@ -342,12 +317,6 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
default: default:
usage(EXIT_FAILURE); break; } usage(EXIT_FAILURE); break; }
} }
/*if (!PE_as_single && qual_thres.min_correct_k_mers_in_read!=1) {
cout<<"Incompatible options: when PE are processed independently, minimum number of correct k-mers in each end is 1."<<endl;
usage(EXIT_FAILURE);
}*/
//parms.filter_PE_separately=PE_separately;
//cout<<PE_as_single<<endl;
processMainArgs(optind, argc, argv,v_input_lines); processMainArgs(optind, argc, argv,v_input_lines);
optArgConsistency(input_file,output_file,g,parms,nb_k_mers,v_input_lines); optArgConsistency(input_file,output_file,g,parms,nb_k_mers,v_input_lines);
if (nb_k_mers) { if (nb_k_mers) {
......
...@@ -87,7 +87,7 @@ public: ...@@ -87,7 +87,7 @@ public:
ROCKparams() { ROCKparams() {
f_id=0; // to generate id of input/output fastq files. +1=total number of files. f_id=0; // to generate id of input/output fastq files. +1=total number of files.
g=0; g=0;
k=30; k=25;
nb_k_mers=0; nb_k_mers=0;
parms.kappa=70; parms.kappa=70;
parms.kappa_prime=5; parms.kappa_prime=5;
......
...@@ -69,7 +69,7 @@ std::string checkDirExists(const string o_file) { ...@@ -69,7 +69,7 @@ std::string checkDirExists(const string o_file) {
* logarithm of the gamma function; * logarithm of the gamma function;
* from here: https://www.cec.uchile.cl/cinetica/pcordero/MC_libros/NumericalRecipesinC.pdf * from here: https://www.cec.uchile.cl/cinetica/pcordero/MC_libros/NumericalRecipesinC.pdf
*/ */
//#include <stdio.h>
double gammln(const unsigned long& xx) { double gammln(const unsigned long& xx) {
double x,y,tmp,ser; double x,y,tmp,ser;
...@@ -134,17 +134,6 @@ int getBestLambdaForN(const unsigned long& nb_k_mers,const int& smallest_kappa,c ...@@ -134,17 +134,6 @@ int getBestLambdaForN(const unsigned long& nb_k_mers,const int& smallest_kappa,c
return min_l; return min_l;
} }
/*
int getBestLambdaForN_altn(const unsigned long& nb_k_mers,const int& smallest_kappa,const unsigned int& m,const float& max_collision_proba) {
float ccdfr=ccdf(smallest_kappa,nb_k_mers,m);
int l=0;
float p=ccdfr;
while(l<50 and p>max_collision_proba) {
p*=ccdfr;
l++;
}
return l-1;
}*/
...@@ -159,13 +148,11 @@ void usage(int status) { ...@@ -159,13 +148,11 @@ void usage(int status) {
cout<<" -i <file> .... file name containing the list of the FASTQ files to process."<<endl; cout<<" -i <file> .... file name containing the list of the FASTQ files to process."<<endl;
cout<<" -o <file> .... file name containing the list of the output FASTQ file names."<<endl; cout<<" -o <file> .... file name containing the list of the output FASTQ file names."<<endl;
cout<<" -h .... Print this message and exit."<<endl; cout<<" -h .... Print this message and exit."<<endl;
cout<<" -k <int> .... k-mer size. (default 30)."<<endl; cout<<" -k <int> .... k-mer size. (default 25)."<<endl;
cout<<" -c <int> .... lower coverage threshold (default: 5)."<<endl; cout<<" -c <int> .... lower coverage threshold (default: 5)."<<endl;
cout<<" -C <int> .... upper coverage threshold (default: 70; max: 65535)."<<endl; cout<<" -C <int> .... upper coverage threshold (default: 70; max: 65535)."<<endl;
cout<<" -l <int> .... size of the count min sketch (default: at most 4, depending on the available RAM)"<<endl; cout<<" -l <int> .... size of the count min sketch (default: at most 4, depending on the available RAM)"<<endl;
cout<<" -n <int> .... (expected) number of distinct k-mers within the processed reads."<<endl; cout<<" -n <int> .... (expected) number of distinct k-mers within the processed reads."<<endl;
// cout<<" -g <int> .... maximum RAM to use (in Gb) for the CMS."<<endl;
// cout<<" -p <int> .... treat PE as single, separately with strict -c filter, separately with lenient -c fileter. Default is 1."<<endl;
cout<<" -q <int> .... minimum quality threshold for nucleotides. Default is 10."<<endl; cout<<" -q <int> .... minimum quality threshold for nucleotides. Default is 10."<<endl;
cout<<" -m <int> .... minimum of valid k-mer threshold for reads. Default is 1."<<endl; cout<<" -m <int> .... minimum of valid k-mer threshold for reads. Default is 1."<<endl;
cout<<" -v .... verbose"<<endl; cout<<" -v .... verbose"<<endl;
......
...@@ -8,10 +8,6 @@ ...@@ -8,10 +8,6 @@
#ifndef MAIN_UTILS_H_ #ifndef MAIN_UTILS_H_
#define MAIN_UTILS_H_ #define MAIN_UTILS_H_
//#define k_max_input_files 15
/*#define k_sep_pair_end ','
#define k_ext '.'
#define path_sep '/'*/
#define k_max_collision_proba 0.1 #define k_max_collision_proba 0.1
......
...@@ -108,18 +108,18 @@ expected_diff2="> @SRR1222430.37 37 length=251 \ ...@@ -108,18 +108,18 @@ expected_diff2="> @SRR1222430.37 37 length=251 \
> A3>333BFA2FF4GBFFDGGCGED?FGEGGHGFFFEEG?AF13@50>///11B13@@1/>//B0?>////<//B/00??@/--:--.;:C000;:0/0009.-9:.00:-.;9/9...-;.--9@--9:////-9-9..////9/;//;9///.9-..--------..99.9.//////;-;--9-.////://9/9.;.-;-99-.//.;////-;?9;...9-9-----9;-.;.;/.-9.;/99=--;" > A3>333BFA2FF4GBFFDGGCGED?FGEGGHGFFFEEG?AF13@50>///11B13@@1/>//B0?>////<//B/00??@/--:--.;:C000;:0/0009.-9:.00:-.;9/9...-;.--9@--9:////-9-9..////9/;//;9///.9-..--------..99.9.//////;-;--9-.////://9/9.;.-;-99-.//.;////-;?9;...9-9-----9;-.;.;/.-9.;/99=--;"
mkdir tmp mkdir tmp
../src/rock -C 100 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 140 ../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 140
../src/rock -C 100 -c 1 -l 2 -q 2 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 141 ../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 141
ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l`
test $ret1 -eq 0 || exit 142 test $ret1 -eq 0 || exit 142
ret2=`diff tmp/klebsiella_100_2_very_low_qual_thres.fq tmp/klebsiella_100_2_no_qual_thres.fq|wc -l` ret2=`diff tmp/klebsiella_100_2_very_low_qual_thres.fq tmp/klebsiella_100_2_no_qual_thres.fq|wc -l`
test $ret2 -eq 0 || exit 143 test $ret2 -eq 0 || exit 143
../src/rock -C 100 -c 1 -l 2 -q 12 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_12.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 144 ../src/rock -C 100 -k 30 -c 1 -l 2 -q 12 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_12.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 144
../src/rock -C 100 -c 1 -l 2 -q 13 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 145 ../src/rock -C 100 -k 30 -c 1 -l 2 -q 13 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 145
ret1=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "@SRR1222430.37"` ret1=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "@SRR1222430.37"`
test $ret1 -eq 1 || exit 146 test $ret1 -eq 1 || exit 146
...@@ -140,9 +140,9 @@ echo "########################################################################## ...@@ -140,9 +140,9 @@ echo "##########################################################################
echo "testing ROCK with a quality score threshold for nucleotides and minimum number of valid k-mer to keep a read." echo "testing ROCK with a quality score threshold for nucleotides and minimum number of valid k-mer to keep a read."
mkdir tmp mkdir tmp
../src/rock -C 100 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 150 ../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 150
../src/rock -C 100 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 151 ../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 151
ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l`
test $ret1 -eq 0 || exit 152 test $ret1 -eq 0 || exit 152
...@@ -159,7 +159,7 @@ ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l` ...@@ -159,7 +159,7 @@ ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l`
test $ret1 -eq 0 || exit 156 test $ret1 -eq 0 || exit 156
test $ret1 -eq 0 || exit 157 test $ret1 -eq 0 || exit 157
../src/rock -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 158 ../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 158
[ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 159 [ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 159
[ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 160 [ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 160
...@@ -170,7 +170,7 @@ ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l` ...@@ -170,7 +170,7 @@ ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l`
test $ret1 -eq 400 || exit 161 test $ret1 -eq 400 || exit 161
test $ret1 -eq 400 || exit 162 test $ret1 -eq 400 || exit 162
../src/rock -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 163 ../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 163
[ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 164 [ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 164
[ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 165 [ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 165
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment