Skip to content
Snippets Groups Projects
Commit ee67a769 authored by Veronique Legrand's avatar Veronique Legrand
Browse files

bugfix: when the number of k-mer is provided in command line by the user, it...

bugfix: when the number of k-mer is provided in command line by the user, it was implicitely converted to unsigned int instead of unsigned long which led to a wrong value for large numbers of k-mets.
parent 2df87e10
No related branches found
No related tags found
No related merge requests found
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#include <cmath>
#include "rock_commons.h" #include "rock_commons.h"
...@@ -288,7 +289,7 @@ template<typename T> int CountMinSketch<T>::isBeneathMinKappa(const T_read_numer ...@@ -288,7 +289,7 @@ template<typename T> int CountMinSketch<T>::isBeneathMinKappa(const T_read_numer
/* /*
* Go through the arrays of the CMS and returns the number of distinct k_mers (smallest nbr of non-zero counters amongst all arrays) * Go through the arrays of the CMS and returns the number of distinct k_mers (smallest nbr of non-zero counters amongst all arrays)
*/ */
template<typename T> unsigned long CountMinSketch<T>::getNbDistinctKMers() { /*template<typename T> unsigned long CountMinSketch<T>::getNbDistinctKMers() {
int j; int j;
unsigned int min=UINT_MAX; unsigned int min=UINT_MAX;
unsigned int h; unsigned int h;
...@@ -303,7 +304,34 @@ template<typename T> unsigned long CountMinSketch<T>::getNbDistinctKMers() { ...@@ -303,7 +304,34 @@ template<typename T> unsigned long CountMinSketch<T>::getNbDistinctKMers() {
(nb_k_mers<min) ?min=nb_k_mers: min; (nb_k_mers<min) ?min=nb_k_mers: min;
} }
return min; return min;
} }*/
/*
* Go through the arrays of the CMS and returns an estimation of the number of distinct k_mers (new formula from Alexis)
*/
template<typename T> unsigned long CountMinSketch<T>::getNbDistinctKMers() {
int j;
unsigned int max=0;
unsigned int h;
unsigned long n;
unsigned long m=Pi_js[0];
for (j=lambda-1;j>=0;--j) {
unsigned long z=0; // number of zeroes in a CMS array.
for (h=Pi_js[j]-1;h>0;--h) { // Have to process the case of h=0 separately otherwise as h is now an unsigned int, it is always >=0 which causes an infinite loop.
(cms_lambda_array[j] [h]==0)? z+=1: z;
}
(cms_lambda_array[j] [0]>0)? z+=1: z;
(z>max)? max=z: max;
(max==z)? m=Pi_js[j]:m;
}
unsigned long lnz=log(max);
unsigned long lnm=log(m);
unsigned long lnm1=log(n-1);
unsigned long deno=lnm1-lnm;
unsigned long nume=lnz-lnm;
n=nume/deno;
return n;
}
#endif /* COUNTMINSKETCH_HPP_ */ #endif /* COUNTMINSKETCH_HPP_ */
...@@ -43,7 +43,7 @@ void ROCKparams::processMainArgs(int optind, const int argc, char ** argv,std::v ...@@ -43,7 +43,7 @@ void ROCKparams::processMainArgs(int optind, const int argc, char ** argv,std::v
void ROCKparams::optArgConsistency(const string& input_file,const string& output_file, void ROCKparams::optArgConsistency(const string& input_file,const string& output_file,
const int& g,CMSparams& parms,const int& nb_k_mers, const int& g,CMSparams& parms,const unsigned long& nb_k_mers,
const std::vector<string>& v_input_lines) { const std::vector<string>& v_input_lines) {
if (input_file.empty() && v_input_lines.empty()) { if (input_file.empty() && v_input_lines.empty()) {
std::cout<<"You must provide filename via -i or arguments to indicate ROCK what to filter." <<std::endl; std::cout<<"You must provide filename via -i or arguments to indicate ROCK what to filter." <<std::endl;
...@@ -257,7 +257,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { ...@@ -257,7 +257,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
int i,q,m; int i,q,m;
std::vector<string> v_input_lines; std::vector<string> v_input_lines;
std::vector<string> v_output_lines; std::vector<string> v_output_lines;
static int PE_separately=1; // static int PE_separately=1;
float proba=k_max_collision_proba; float proba=k_max_collision_proba;
while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vq:m:f:")) != -1) { while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vq:m:f:")) != -1) {
...@@ -302,7 +302,9 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { ...@@ -302,7 +302,9 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) {
qual_thres.k=k; qual_thres.k=k;
break; break;
case 'n': case 'n':
nb_k_mers=atoi(optarg);break; // number of distinct k-mers char * t;
cout<<optarg<<endl;
nb_k_mers=strtoul(optarg,&t,10);break; // number of distinct k-mers
case 'v': case 'v':
verbose_mode=1; verbose_mode=1;
break; break;
......
...@@ -47,7 +47,7 @@ class ROCKparams{ ...@@ -47,7 +47,7 @@ class ROCKparams{
CMSparams parms; CMSparams parms;
FasqQualThreshold qual_thres; FasqQualThreshold qual_thres;
int g; // max RAM to use for the CMS if specified by the user. int g; // max RAM to use for the CMS if specified by the user.
int nb_k_mers; // expected number of k-mers in input data if specified by the user. unsigned long nb_k_mers; // expected number of k-mers in input data if specified by the user.
int k; // size of the k-mers int k; // size of the k-mers
int verbose_mode; int verbose_mode;
std::string input_file,output_file; std::string input_file,output_file;
...@@ -74,7 +74,7 @@ class ROCKparams{ ...@@ -74,7 +74,7 @@ class ROCKparams{
* Also set some CMS parameters depending on value of other options. * Also set some CMS parameters depending on value of other options.
*/ */
void optArgConsistency(const string& input_file,const string& output_file, void optArgConsistency(const string& input_file,const string& output_file,
const int& g,CMSparams& parms,const int& nb_k_mers, const int& g,CMSparams& parms,const unsigned long& nb_k_mers,
const std::vector<string>& v_input_lines); const std::vector<string>& v_input_lines);
friend void test_processIOFileArgs(); friend void test_processIOFileArgs();
......
...@@ -143,6 +143,8 @@ float getCollisionProba(const int& smallest_kappa,const unsigned long& nb_k_mers ...@@ -143,6 +143,8 @@ float getCollisionProba(const int& smallest_kappa,const unsigned long& nb_k_mers
return fpp; return fpp;
} }
void usage(int status) { void usage(int status) {
cout<<"usage: rock [options] [args]"<<endl<<endl; cout<<"usage: rock [options] [args]"<<endl<<endl;
cout<<" -i <file> .... file name containing the list of the FASTQ files to process."<<endl; cout<<" -i <file> .... file name containing the list of the FASTQ files to process."<<endl;
......
...@@ -85,6 +85,9 @@ void test_getBestLambdaForN() { ...@@ -85,6 +85,9 @@ void test_getBestLambdaForN() {
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.1); best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.1);
//cout<<best<<endl; //cout<<best<<endl;
assert(best==1); assert(best==1);
best=getBestLambdaForN(nb_k_mer,1,UINT_MAX,0.05);
//cout<<best<<endl;
assert(best==8);
nb_k_mer=50000000000; nb_k_mer=50000000000;
best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.1);// this returns 90 best=getBestLambdaForN(nb_k_mer,5,UINT_MAX,0.1);// this returns 90
...@@ -95,7 +98,6 @@ void test_getBestLambdaForN() { ...@@ -95,7 +98,6 @@ void test_getBestLambdaForN() {
best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,0.05); best=getBestLambdaForN(nb_k_mer,50,UINT_MAX,0.05);
assert(best==1); assert(best==1);
best=getBestLambdaForN(nb_k_mer,10,UINT_MAX,0.05); best=getBestLambdaForN(nb_k_mer,10,UINT_MAX,0.05);
assert(best==7);
} }
void test_getCollisionProba() { void test_getCollisionProba() {
...@@ -104,7 +106,7 @@ void test_getCollisionProba() { ...@@ -104,7 +106,7 @@ void test_getCollisionProba() {
p=getCollisionProba(2,5000000000,UINT_MAX,1); p=getCollisionProba(2,5000000000,UINT_MAX,1);
assert(p=0.1128); assert(p=0.1128);
p =getCollisionProba(1,2,UINT_MAX,3); p =getCollisionProba(1,2,UINT_MAX,3);
cout<<p<<endl; //cout<<p<<endl;
assert(p==0.0); assert(p==0.0);
p =getCollisionProba(1,1000000000,UINT_MAX,1); p =getCollisionProba(1,1000000000,UINT_MAX,1);
assert(floor(p*1000+0.5)==23); assert(floor(p*1000+0.5)==23);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment