From 47cbf801a96e780bbdc602a5557d1117a12fbb10 Mon Sep 17 00:00:00 2001
From: Veronique Legrand <vlegrand@pasteur.fr>
Date: Fri, 3 Jun 2016 11:55:00 +0200
Subject: [PATCH] improved lambda computation with option -n

---
 src/main_utils.cpp | 5 ++++-
 src/main_utils.h   | 2 +-
 src/rock.cpp       | 4 ++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/main_utils.cpp b/src/main_utils.cpp
index 2f3e279..0263fd7 100644
--- a/src/main_utils.cpp
+++ b/src/main_utils.cpp
@@ -139,9 +139,12 @@ int processInOutFileArgs(const std::string& input_file,const std::string output_
  * We want p<=0.01,
  * so choose smallest lambda so that [1-(1-1/m)exp n] exp lambda<=0.01
  */
-int getBestLambdaForN(const unsigned long& nb_k_mers,const int& lambda_max) {
+int getBestLambdaForN(const unsigned long& nb_k_mers,int lambda_max) {
     int lambda=2;
     int min_lambda=lambda;
+    if (lambda_max==0) { // no upper bound specified by the user via the -g option
+        lambda_max=500;
+    }
     long double tmp=1.0/INT_MAX;
     tmp=1.0-tmp;
     tmp=pow(tmp,nb_k_mers);
diff --git a/src/main_utils.h b/src/main_utils.h
index 50ebafe..b6ce6e5 100644
--- a/src/main_utils.h
+++ b/src/main_utils.h
@@ -16,7 +16,7 @@
 
 unsigned long getNodePhysMemory();
 int processInOutFileArgs(const std::string& input_file,const std::string output_file,std::vector<IO_fq_files>& single_files,std::vector<PE_files>& v_PE_files,int& f_id);
-int getBestLambdaForN(const unsigned long& nb_k_mers,const int& lambda_max);
+int getBestLambdaForN(const unsigned long& nb_k_mers,int lambda_max);
 float getCollisionProba(const unsigned long& nb_k_mers,const int& lambda);
 
 
diff --git a/src/rock.cpp b/src/rock.cpp
index 5d31cf4..9c9a98f 100644
--- a/src/rock.cpp
+++ b/src/rock.cpp
@@ -63,7 +63,7 @@ static void usage(int status) {
   cout<<"    -C <kappa>                   .... Specify upper threshold for coverage. Default is 50. Max is 65535."<<endl;
   cout<<"    -l <lambda>                  .... Indicate number of arrays wanted in the CMS. Default is biggest l so that l<B/(b*MAX_INT) where b is 1 or 2 depending on kappa and B is the RAM quantity on the machine."<<endl;
   cout<<"    -n <nb_distinct_k_mer>       .... Indicate the number of distinct k-mer. Useful to compute lambda if not specified with -l."<<endl;
-  cout<<"    -g <CMS size in GB>              .... Wanted size for the CMS."<<endl;
+  cout<<"    -g <CMS size in GB>          .... Wanted size for the CMS."<<endl;
   exit(status); }
 
 
@@ -94,7 +94,7 @@ void optArgConsistency(const string& input_file,const string& output_file,const
         cout<<"-l and -g options are mutually exclusive."<<endl;
         usage(EXIT_FAILURE);
     }
-    if (nb_k_mers!=0 and parms.lambda!=0) {// user set both lambda and cms size=> inconsistency.
+    if (nb_k_mers!=0 and parms.lambda!=0) {// user set both lambda and number of k-mers=> inconsistency.
         cout<<"-l and -n options are mutually exclusive."<<endl;
         usage(EXIT_FAILURE);
     }
-- 
GitLab