Skip to content
Snippets Groups Projects
Commit 8da988de authored by Veronique Legrand's avatar Veronique Legrand
Browse files

work in progess for new way of computing lamda

parent 4dabc5c3
No related branches found
No related tags found
No related merge requests found
......@@ -114,7 +114,7 @@ double ccdf(const int& smallest_kappa,const unsigned long& nb_k_mers,const unsig
if (res<0) res=0.00000001; // It happens because pmf if an approximation of the probability distribution.
// So it happens that ccdf is negative but very close to 0... As we need a non null value for the next calculation,
// return a very small constant.
if (res==1) res=0.9999999; // Need to do that because we compule ln(ccdf) to determine lamda and we need it to be non null otherwise we'd get a division by 0.
if (res>=0.999) res=0.999; // Need to do that because we compule ln(ccdf) to determine lamda and we need it to be non null otherwise we'd get a division by 0.
return res;
}
......@@ -134,6 +134,18 @@ int getBestLambdaForN(const unsigned long& nb_k_mers,const int& smallest_kappa,c
return min_l;
}
/*
int getBestLambdaForN_altn(const unsigned long& nb_k_mers,const int& smallest_kappa,const unsigned int& m,const float& max_collision_proba) {
float ccdfr=ccdf(smallest_kappa,nb_k_mers,m);
int l=0;
float p=ccdfr;
while(l<50 and p>max_collision_proba) {
p*=ccdfr;
l++;
}
return l-1;
}*/
float getCollisionProba(const int& smallest_kappa,const unsigned long& nb_k_mers,const unsigned int& arr_cms_size,const int& lambda) {
......
......@@ -25,6 +25,7 @@
unsigned long getNodePhysMemory();
std::string checkDirExists(const std::string o_file);
int getBestLambdaForN(const unsigned long& nb_k_mers,const int& smallest_kappa,const unsigned int& m,const float& max_collision_proba);
int getBestLambdaForN_altn(const unsigned long& nb_k_mers,const int& smallest_kappa,const unsigned int& m,const float& max_collision_proba);
double gammln(const unsigned long& xx);
double pmf(const float&x,const unsigned long& nb_k_mers,const unsigned int& m);
double ccdf(const int& smallest_kappa,const unsigned long& nb_k_mers,const unsigned int& m);
......
......@@ -359,9 +359,10 @@ int main(int argc, char **argv) {
cout<<"testing CMS with lambda="<<lambda<<endl;
test_CMS(lambda,kappa,kappa_prime); // Finally using C arrays (maps implied storing hash keys : 4 Bytes per k_mer overhead) but each array is of size INT_MAX...
cout<<"testing CMS with PE not as single strict kappa prime implementation"<<endl;
testCMS_with_PE_NotasSingle(lambda,kappa,kappa_prime);
// VL comment these tests since AC says That the best option is to treat PE as single.
/*testCMS_with_PE_NotasSingle(lambda,kappa,kappa_prime);
cout<<"testing CMS with PE not as single lenient kappa prime implementation"<<endl;
testCMS_with_PE_NotasSingle2(lambda,kappa,kappa_prime);
testCMS_with_PE_NotasSingle2(lambda,kappa,kappa_prime);*/
cout<<"done"<<endl;
// profiling_CMS_fill();
......
......@@ -73,30 +73,37 @@ void test_getBestLambdaForN() {
int best=getBestLambdaForN(nb_k_mer,2,UINT_MAX,0.05);
assert(best==2);
nb_k_mer=200000000;
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,k_max_collision_proba);
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.1);
assert(best==1);
nb_k_mer=600000000;
best=getBestLambdaForN(nb_k_mer,70,UINT_MAX,k_max_collision_proba);
best=getBestLambdaForN(nb_k_mer,70,UINT_MAX,0.1);
assert(best==1);
nb_k_mer=2000000000;
best=getBestLambdaForN(nb_k_mer,10,UINT_MAX,k_max_collision_proba);
best=getBestLambdaForN(nb_k_mer,10,UINT_MAX,0.1);
assert(best==1);
nb_k_mer=10000000000;
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,k_max_collision_proba);
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.1);
//cout<<best<<endl;
assert(best==1);
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,k_max_collision_proba);
cout<<best<<endl;
//assert(best==1);
nb_k_mer=100000000000;
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,k_max_collision_proba);
cout<<best<<endl;
nb_k_mer=1000000000000;
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,k_max_collision_proba);
cout<<best<<endl;
nb_k_mer=100000000000000;
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,k_max_collision_proba);
cout<<best<<endl; // VL: get 19315484
nb_k_mer=50000000000;
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.1);// this returns 90
assert(best==90);
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,0.1);
assert(best==1);
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,0.05);
assert(best==1);
best=getBestLambdaForN(nb_k_mer,2,UINT_MAX,0.05);
assert(best==4206);
best=getBestLambdaForN(nb_k_mer,3,UINT_MAX,0.05);
assert(best==991);
best=getBestLambdaForN(nb_k_mer,4,UINT_MAX,0.05);
assert(best==306);
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.05);
assert(best==117);
best=getBestLambdaForN(nb_k_mer,10,UINT_MAX,0.05);
assert(best==7);
}
void test_getCollisionProba() {
......@@ -108,12 +115,15 @@ void test_getCollisionProba() {
cout<<p<<endl;
assert(p==0.0);
p =getCollisionProba(1,1000000000,UINT_MAX,1);
// cout<<p<<endl;
assert(floor(p*1000+0.5)==43);
//cout<<p<<endl;
//cout<<p*1000+0.5<<endl;
assert(floor(p*1000+0.5)==23);
p =getCollisionProba(5,1000000000,UINT_MAX,1);
assert(floor(p*1000+0.5)==2);
p =getCollisionProba(5,20000000000,UINT_MAX,1);
assert(floor(p*1000+0.5)==909);
//cout<<p<<endl;
assert(floor(p*1000+0.5)==0);
p =getCollisionProba(5,50000000000,UINT_MAX,1);
//cout<<p<<endl;
assert(floor(p*1000+0.5)==975);
}
void test_gammln() {
......@@ -148,7 +158,6 @@ void test_gammln() {
}
// #include <float.h>
void test_pmf() {
unsigned long nb_k_mers=5000000000;
float p=pmf(0,nb_k_mers,UINT_MAX);
......@@ -180,10 +189,10 @@ int main() {
test_pmf();
cout<<"testing the ccdf function"<<endl;
test_ccdf();
cout<<"test computation of the bast lambda value depending on nb distinct k-mers."<<endl;
test_getBestLambdaForN();
cout<<"testing computation of collision probability."<<endl;
test_getCollisionProba();
cout<<"test computation of the bast lambda value depending on nb distinct k-mers."<<endl;
test_getBestLambdaForN();
cout<<"done"<<endl;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment