diff --git a/src/CountMinSketch.cpp b/src/CountMinSketch.cpp index 349d351ca0dd20983abc33eb4d169f368d32b6a6..44138065f27ea73ff98d32f653bbf8ff07a71d2a 100644 --- a/src/CountMinSketch.cpp +++ b/src/CountMinSketch.cpp @@ -77,24 +77,7 @@ void CountMinSketch::addKMer(const unsigned long& val1,const unsigned long& val2 } -/* - * Used to determine if a read must be kept or not. - * Depending on the case, threshold is kappa or kappa_prime - * Computes the number of k-mers for which the number of occurrences is below threshold. - * If more than half of the k-mers have a number of occurrences that is less than threshold, - * return true; else, return false. - */ -int CountMinSketch::isRCovBelowThres(const readNumericValues& read_val,const int& threshold) { - int a=0; - int b=0; - readNumericValues::const_iterator it; - for (it=read_val.begin();it!=read_val.end();it++) { - unsigned char nb_occ=getEstimatedNbOcc(*it); - if (nb_occ<threshold) a+=1; - ++b; - } - return (2*a>b); -} + int CountMinSketch::addRead(const readNumericValues& read_val) { diff --git a/src/CountMinSketch.h b/src/CountMinSketch.h index 491a9f18353c211faa185c4c2a3fb97e19d0e2fe..cfba37e55118c02ea0c83ecd367e69cc5b6cc044 100644 --- a/src/CountMinSketch.h +++ b/src/CountMinSketch.h @@ -66,7 +66,43 @@ class CountMinSketch { // std::map<int,int> getIthArray(int); // utility method provided for testing only. void addKMer(const unsigned long&,const unsigned long&); // inline? TODO: see later if it can help us gain time. - int isRCovBelowThres(const readNumericValues& read_val, const int& threshold); + // int isRCovBelowThres(const readNumericValues& read_val, const int& threshold); + /* + * Used to determine if a read must be kept or not. + * Depending on the case, threshold is kappa or kappa_prime + * Computes the number of k-mers for which the number of occurrences is below threshold. + * If more than half of the k-mers have a number of occurrences that is less than threshold, + * return true; else, return false. + */ + inline int isRCovBelowThres(const readNumericValues& read_val,const int& threshold) { + int a=0; + int b=0; + int j; + int h; + unsigned char min; + readNumericValues::const_iterator it; + for (it=read_val.begin();it!=read_val.end();it++) { + j=lambda; + min=ubytemask; + while (--j>=0 && min>threshold) { + h=hash64to32(*it,j); + min=cms_lambda_array[j] [h]; + (min<threshold)? a+=1:a+=0; + } + ++b; + } + return (2*a>b); + } + + /* inline unsigned char CountMinSketch::isKmCoveBelowThres(const unsigned long& val, const int& threshold) { + int h; + unsigned char min; + int j=lambda; + while(--j>=0) { + h=hash64to32(val,j); + min=cms_lambda_array[j] [h]; + } + }*/ void init(int glambda,int gkappa,int gkappa_prime) { lambda=glambda; @@ -104,13 +140,15 @@ public: free(cms_lambda_array); } + // keep that just for unit testing purpose inline unsigned char getEstimatedNbOcc(const unsigned long& val) { int j,h; // unsigned short min=ushortmask; unsigned char min=ubytemask; - for (j=0;j<lambda;j++) { + j=lambda; + while(--j>=0 && min>kappa) { h=hash64to32(val,j); - if (cms_lambda_array[j] [h] <min) min=cms_lambda_array[j] [h]; + min=cms_lambda_array[j] [h]; } return min; }