From 2cf1abc1c089e6952014518abb5b8ccfb53f07bf Mon Sep 17 00:00:00 2001 From: Veronique Legrand <vlegrand@pasteur.fr> Date: Mon, 11 Apr 2016 17:23:21 +0200 Subject: [PATCH] bugfix : now increment the counter for only the biggest hash value obtained for a k-mer and its reverse complement. Before, I incemented both counters and it is not what we want. --- src/CountMinSketch.cpp | 13 +++++++------ src/CountMinSketch.h | 2 +- src/unit_test_cms.cpp | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/CountMinSketch.cpp b/src/CountMinSketch.cpp index d164eb6..349d351 100644 --- a/src/CountMinSketch.cpp +++ b/src/CountMinSketch.cpp @@ -61,17 +61,17 @@ const int CountMinSketch::Pi_js[500]={2147469629, 2147469637, 2147469659, 214746 -void CountMinSketch::addKMer(unsigned long val) { - int h,j; +void CountMinSketch::addKMer(const unsigned long& val1,const unsigned long& val2) { + int h1,h2,h,j; // short cnt; unsigned char cnt; //unsigned short min=ushortmask; for (j=0;j<lambda;j++) { - h=hash64to32(val,j); + h1=hash64to32(val1,j); + h2=hash64to32(val2,j); + (h1>h2)?h=h1:h=h2; cnt=cms_lambda_array[j] [h]; cnt++; - //if (cnt<min) min=cnt; - // cms_lambda_array[j] [h]=(cnt & ushortmask); cms_lambda_array[j] [h]=(cnt & ubytemask); } } @@ -102,7 +102,8 @@ int CountMinSketch::addRead(const readNumericValues& read_val) { if (keep_r) { readNumericValues::const_iterator it; for (it=read_val.begin();it!=read_val.end();it++) { - addKMer(*it); + /* Here, note that k-mer go by pair : 1rst 1 is k_mer and next one is its reverse complement*/ + addKMer(*it,*(++it)); } } return keep_r; diff --git a/src/CountMinSketch.h b/src/CountMinSketch.h index a20272c..491a9f1 100644 --- a/src/CountMinSketch.h +++ b/src/CountMinSketch.h @@ -64,7 +64,7 @@ class CountMinSketch { // std::map<int,int> getIthArray(int); // utility method provided for testing only. - void addKMer(unsigned long); // inline? TODO: see later if it can help us gain time. + void addKMer(const unsigned long&,const unsigned long&); // inline? TODO: see later if it can help us gain time. int isRCovBelowThres(const readNumericValues& read_val, const int& threshold); diff --git a/src/unit_test_cms.cpp b/src/unit_test_cms.cpp index 99242aa..42a136b 100644 --- a/src/unit_test_cms.cpp +++ b/src/unit_test_cms.cpp @@ -36,7 +36,7 @@ void test_CMS(int lambda,int kappa,int kappa_prime) { int rej_expected=0; int ret; for (i=0;i<num;i++) { - cms.addKMer(num); + cms.addKMer(num,num-1); } // Now, check that our k-mer is present in the CMS. int min=cms.getEstimatedNbOcc(num); -- GitLab