Skip to content
Snippets Groups Projects
Commit 687229fe authored by Veronique Legrand's avatar Veronique Legrand
Browse files

added option -f to allow thge users to specify what maximum value of collision...

added option -f to allow thge users to specify what maximum value of collision probability they want in the CMS.
parent bdc83f07
No related branches found
No related tags found
No related merge requests found
......@@ -15,13 +15,9 @@
#include "rock_commons.h"
#define BAD_TYPE -1
typedef struct {
int lambda;
int kappa;
......@@ -30,6 +26,7 @@ typedef struct {
int filter_PE_separately; // indicates whether PE reads must be treated as single by the cms. Indeed it may appear that 1 end contains useful k-mer but that other end contains k-mer such that "global" median is below threshold.
// In this case, read is rejected by the CMS (default behavior). We want to do an experimentation to see if keeping such reads wold lead to better results in assembling.
// Values can be 0 (process PE as single),1 (process PE separately with strict implementation for kappa_prime) or 2 (process PE separately with lenient implementation for kappa prime).
float wanted_max_collision_proba;
} CMSparams;
template <typename T>
......
......@@ -65,7 +65,7 @@ void ROCKparams::optArgConsistency(const string& input_file,const string& output
int smallest_kappa;
if (parms.kappa_prime==0) smallest_kappa=parms.kappa;
else smallest_kappa=parms.kappa_prime;
parms.lambda=getBestLambdaForN(nb_k_mers,smallest_kappa,Pi_js[0],k_max_collision_proba);
parms.lambda=getBestLambdaForN(nb_k_mers,smallest_kappa,Pi_js[0],parms.wanted_max_collision_proba);
}
if (parms.kappa_prime>=parms.kappa) {
cout<<"ERROR lower filter is higher than high filter!"<<endl;
......@@ -259,7 +259,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
std::vector<string> v_output_lines;
static int PE_separately=1;
while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vq:m:")) != -1) {
while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vq:m:f:")) != -1) {
switch(i) {
case 0:
break;
......@@ -267,6 +267,13 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
input_file=optarg;break;
case 'c':
parms.kappa_prime=atoi(optarg);break;
case 'f':
float proba=atof(optarg);
if ((proba<=0.0) or (proba>=1.0)) {
cout<<"maximum for collision probability in the CMS must be > 0.0 and <1.0."<<endl;
usage(EXIT_FAILURE);
}
parms.wanted_max_collision_proba=proba;break;
case 'h':
usage(EXIT_SUCCESS); break;
case 'o':
......@@ -291,7 +298,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
cout<<"Bad value for k. Must choose k so that 0<k<=32."<<endl;
usage(EXIT_FAILURE);
}
qual_thres.k=k;
qual_thres.k=k;
break;
case 'n':
nb_k_mers=atoi(optarg);break; // number of distinct k-mers
......
......@@ -93,6 +93,7 @@ public:
parms.kappa_prime=0;
parms.lambda=0;
parms.filter_PE_separately=0;
parms.wanted_max_collision_proba=k_max_collision_proba;
qual_thres.min_correct_k_mers_in_read=1;
qual_thres.nucl_score_threshold=k_phred_32;
qual_thres.k=k;
......
......@@ -151,6 +151,7 @@ void usage(int status) {
cout<<" -k <int> .... k-mer size. (default 25)."<<endl;
cout<<" -c <int> .... lower coverage threshold (default: 0)."<<endl;
cout<<" -C <int> .... upper coverage threshold (default: 70; max: 65535)."<<endl;
cout<<" -f <float> .... maximum probability of collision in the count min sketch (default: 0.05)"<<endl;
cout<<" -l <int> .... size of the count min sketch (default: at most 4, depending on the available RAM)"<<endl;
cout<<" -n <int> .... (expected) number of distinct k-mers within the processed reads."<<endl;
cout<<" -q <int> .... minimum quality threshold for nucleotides. Default is 0."<<endl;
......
......@@ -94,14 +94,6 @@ void test_getBestLambdaForN() {
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,0.05);
assert(best==1);
/*best=getBestLambdaForN(nb_k_mer,2,UINT_MAX,0.05);
assert(best==4206);
best=getBestLambdaForN(nb_k_mer,3,UINT_MAX,0.05);
assert(best==991);
best=getBestLambdaForN(nb_k_mer,4,UINT_MAX,0.05);
assert(best==306);
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.05);
assert(best==117);*/
best=getBestLambdaForN(nb_k_mer,10,UINT_MAX,0.05);
assert(best==7);
}
......@@ -115,14 +107,10 @@ void test_getCollisionProba() {
cout<<p<<endl;
assert(p==0.0);
p =getCollisionProba(1,1000000000,UINT_MAX,1);
//cout<<p<<endl;
//cout<<p*1000+0.5<<endl;
assert(floor(p*1000+0.5)==23);
p =getCollisionProba(5,1000000000,UINT_MAX,1);
//cout<<p<<endl;
assert(floor(p*1000+0.5)==0);
p =getCollisionProba(5,50000000000,UINT_MAX,1);
//cout<<p<<endl;
assert(floor(p*1000+0.5)==975);
}
......@@ -139,7 +127,6 @@ void test_gammln() {
a=gammln(5);
assert(exp(a)==24);
a=gammln(6);
//cout<<exp(a)<<endl;
float tmp=exp(a);
float tmp2=tmp*10000;
float tmp3=round(tmp2)/10000;
......@@ -147,14 +134,11 @@ void test_gammln() {
assert(tmp3==120);
/* unsigned long n=5000;
unsigned long m=n+1;
cout<<"m="<<m<<endl;
//float i=m;
a=gammln(m);
float b=gammln(n);
double truc=exp(a-b);
printf("truc=%lf \n",truc);
//cout<<exp(a)/exp(b)<<endl;// Too big.
//assert(exp(a)/exp(b)==n);*/
assert(truc==n);
*/
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment