diff --git a/src/CountMinSketch.hpp b/src/CountMinSketch.hpp index 908b3894d0b31d199ebafa8990244f12b97add5f..a7814d24da68c240e1c8c1741a003940d5d93ff8 100644 --- a/src/CountMinSketch.hpp +++ b/src/CountMinSketch.hpp @@ -286,58 +286,34 @@ template<typename T> int CountMinSketch<T>::isBeneathMinKappa(const T_read_numer return res; } -/* - * Go through the arrays of the CMS and returns the number of distinct k_mers (smallest nbr of non-zero counters amongst all arrays) - */ -/*template<typename T> unsigned long CountMinSketch<T>::getNbDistinctKMers() { - int j; - unsigned int min=UINT_MAX; - unsigned int h; - for (j=lambda-1;j>=0;--j) { - unsigned int nb_k_mers=0; - for (h=UINT_MAX-1;h>0;--h) { // Have to process the case of h=0 separately otherwise as h is now an unsigned int, it is always >=0 which causes an infinite loop. - //std::cout<<h<<std::endl; - (cms_lambda_array[j] [h]>0)? nb_k_mers+=1: nb_k_mers; - } - (cms_lambda_array[j] [0]>0)? nb_k_mers+=1: nb_k_mers; - (nb_k_mers<min) ?min=nb_k_mers: min; - } - return min; -}*/ /* * Go through the arrays of the CMS and returns an estimation of the number of distinct k_mers (new formula from Alexis) */ template<typename T> unsigned long CountMinSketch<T>::getNbDistinctKMers() { int j; - unsigned int max=0; + unsigned long max=0; unsigned int h; unsigned long n; - unsigned long m=Pi_js[0]; + unsigned long m; - //std::cout<<"lambda="<<lambda<<std::endl; for (j=lambda-1;j>=0;--j) { + m=Pi_js[j]; unsigned long z=0; // number of zeroes in a CMS array. for (h=Pi_js[j]-1;h>0;--h) { // Have to process the case of h=0 separately otherwise as h is now an unsigned int, it is always >=0 which causes an infinite loop. (cms_lambda_array[j] [h]==0)? z+=1: z; } (cms_lambda_array[j] [0]==0)? z+=1: z; - (z>max)? max=z: max; - (max==z)? m=Pi_js[j]:m; + double lnz=log(z); + double lnm=log(m); + double lnm1=log(m-1); + double deno=lnm1-lnm; + double nume=lnz-lnm; + n=nume/deno; + (n>max)?max=n:max; } - //std::cout<<"max="<<max<<std::endl; - //std::cout<<"m="<<m<<std::endl; - double lnz=log(max); - double lnm=log(m); - double lnm1=log(m-1); - double deno=lnm1-lnm; - double nume=lnz-lnm; - /*std::cout<<"nume="<<nume<<std::endl; - std::cout<<"deno="<<deno<<std::endl; - std::cout<<"n before conversion"<<nume/deno<<std::endl;*/ - n=nume/deno; - return n; + return max; } #endif /* COUNTMINSKETCH_HPP_ */