From 2cf1abc1c089e6952014518abb5b8ccfb53f07bf Mon Sep 17 00:00:00 2001
From: Veronique Legrand <vlegrand@pasteur.fr>
Date: Mon, 11 Apr 2016 17:23:21 +0200
Subject: [PATCH] bugfix : now increment the counter for only the biggest hash
 value obtained for a k-mer and its reverse complement. Before, I incemented
 both counters and it is not what we want.

---
 src/CountMinSketch.cpp | 13 +++++++------
 src/CountMinSketch.h   |  2 +-
 src/unit_test_cms.cpp  |  2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/CountMinSketch.cpp b/src/CountMinSketch.cpp
index d164eb6..349d351 100644
--- a/src/CountMinSketch.cpp
+++ b/src/CountMinSketch.cpp
@@ -61,17 +61,17 @@ const int CountMinSketch::Pi_js[500]={2147469629, 2147469637, 2147469659, 214746
 
 
 
-void CountMinSketch::addKMer(unsigned long val) {
-    int h,j;
+void CountMinSketch::addKMer(const unsigned long& val1,const unsigned long& val2) {
+    int h1,h2,h,j;
     // short cnt;
     unsigned char cnt;
     //unsigned short min=ushortmask;
     for (j=0;j<lambda;j++) {
-        h=hash64to32(val,j);
+        h1=hash64to32(val1,j);
+        h2=hash64to32(val2,j);
+        (h1>h2)?h=h1:h=h2;
         cnt=cms_lambda_array[j] [h];
         cnt++;
-        //if (cnt<min) min=cnt;
-        // cms_lambda_array[j] [h]=(cnt & ushortmask);
         cms_lambda_array[j] [h]=(cnt & ubytemask);
     }
 }
@@ -102,7 +102,8 @@ int CountMinSketch::addRead(const readNumericValues& read_val) {
     if (keep_r) {
         readNumericValues::const_iterator it;
         for (it=read_val.begin();it!=read_val.end();it++) {
-            addKMer(*it);
+            /* Here, note that k-mer go by pair : 1rst 1 is k_mer and next one is its reverse complement*/
+            addKMer(*it,*(++it));
         }
     }
     return keep_r;
diff --git a/src/CountMinSketch.h b/src/CountMinSketch.h
index a20272c..491a9f1 100644
--- a/src/CountMinSketch.h
+++ b/src/CountMinSketch.h
@@ -64,7 +64,7 @@ class CountMinSketch {
 
 
     // std::map<int,int> getIthArray(int); // utility method provided for testing only.
-    void addKMer(unsigned long); // inline? TODO: see later if it can help us gain time.
+    void addKMer(const unsigned long&,const unsigned long&); // inline? TODO: see later if it can help us gain time.
 
     int isRCovBelowThres(const readNumericValues& read_val, const int& threshold);
 
diff --git a/src/unit_test_cms.cpp b/src/unit_test_cms.cpp
index 99242aa..42a136b 100644
--- a/src/unit_test_cms.cpp
+++ b/src/unit_test_cms.cpp
@@ -36,7 +36,7 @@ void test_CMS(int lambda,int kappa,int kappa_prime) {
     int rej_expected=0;
     int ret;
     for (i=0;i<num;i++) {
-        cms.addKMer(num);
+        cms.addKMer(num,num-1);
     }
     // Now, check that our k-mer is present in the CMS.
     int min=cms.getEstimatedNbOcc(num);
-- 
GitLab