Skip to content
Snippets Groups Projects
Commit 8969b03b authored by Veronique Legrand's avatar Veronique Legrand
Browse files

did some modif for the calculation of min number of ocurrences for each k-mer (fast path)

parent 2cf1abc1
No related branches found
No related tags found
No related merge requests found
......@@ -77,24 +77,7 @@ void CountMinSketch::addKMer(const unsigned long& val1,const unsigned long& val2
}
/*
* Used to determine if a read must be kept or not.
* Depending on the case, threshold is kappa or kappa_prime
* Computes the number of k-mers for which the number of occurrences is below threshold.
* If more than half of the k-mers have a number of occurrences that is less than threshold,
* return true; else, return false.
*/
int CountMinSketch::isRCovBelowThres(const readNumericValues& read_val,const int& threshold) {
int a=0;
int b=0;
readNumericValues::const_iterator it;
for (it=read_val.begin();it!=read_val.end();it++) {
unsigned char nb_occ=getEstimatedNbOcc(*it);
if (nb_occ<threshold) a+=1;
++b;
}
return (2*a>b);
}
int CountMinSketch::addRead(const readNumericValues& read_val) {
......
......@@ -66,7 +66,43 @@ class CountMinSketch {
// std::map<int,int> getIthArray(int); // utility method provided for testing only.
void addKMer(const unsigned long&,const unsigned long&); // inline? TODO: see later if it can help us gain time.
int isRCovBelowThres(const readNumericValues& read_val, const int& threshold);
// int isRCovBelowThres(const readNumericValues& read_val, const int& threshold);
/*
* Used to determine if a read must be kept or not.
* Depending on the case, threshold is kappa or kappa_prime
* Computes the number of k-mers for which the number of occurrences is below threshold.
* If more than half of the k-mers have a number of occurrences that is less than threshold,
* return true; else, return false.
*/
inline int isRCovBelowThres(const readNumericValues& read_val,const int& threshold) {
int a=0;
int b=0;
int j;
int h;
unsigned char min;
readNumericValues::const_iterator it;
for (it=read_val.begin();it!=read_val.end();it++) {
j=lambda;
min=ubytemask;
while (--j>=0 && min>threshold) {
h=hash64to32(*it,j);
min=cms_lambda_array[j] [h];
(min<threshold)? a+=1:a+=0;
}
++b;
}
return (2*a>b);
}
/* inline unsigned char CountMinSketch::isKmCoveBelowThres(const unsigned long& val, const int& threshold) {
int h;
unsigned char min;
int j=lambda;
while(--j>=0) {
h=hash64to32(val,j);
min=cms_lambda_array[j] [h];
}
}*/
void init(int glambda,int gkappa,int gkappa_prime) {
lambda=glambda;
......@@ -104,13 +140,15 @@ public:
free(cms_lambda_array);
}
// keep that just for unit testing purpose
inline unsigned char getEstimatedNbOcc(const unsigned long& val) {
int j,h;
// unsigned short min=ushortmask;
unsigned char min=ubytemask;
for (j=0;j<lambda;j++) {
j=lambda;
while(--j>=0 && min>kappa) {
h=hash64to32(val,j);
if (cms_lambda_array[j] [h] <min) min=cms_lambda_array[j] [h];
min=cms_lambda_array[j] [h];
}
return min;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment