Skip to content
Snippets Groups Projects
Commit 3f5b4fb0 authored by Veronique Legrand's avatar Veronique Legrand
Browse files

finished CMS component

parent d29df2fe
No related branches found
No related tags found
No related merge requests found
......@@ -10,38 +10,65 @@
const int max_pow=30;
/* This method is used to determine if a number is a prime number or not.
* It is incomplete. TODO find an effective method to get lamdba prime numbers when we'll be sure whether we use the
* "prime number" version of the hash functions. */
/*int CountMinSketch::isPrime(unsigned int num) {
if ((num % 2 ==0) || (num==2)) return 0;
if ((num % 3 ==0) || (num==3)) return 0;
if ((num % 5 ==0) || (num==5)) return 0;
if ((num % 7 ==0) || (num==7)) return 0;
return 1;
}
int CountMinSketch::isMersenne(unsigned int num) {
int cur_pow=max_pow;
unsigned int mers_nbr=pow(2,cur_pow)-1;
while (num!=mers_nbr && cur_pow>=1) {
cur_pow-=1;
mers_nbr=pow(2,cur_pow)-1;
}
if (cur_pow==0) return 0;
else return 1;
// Store the non mersenne prime numbers for modulo hashing in this array.
int Pi_js[500]={
2147469629, 2147469637, 2147469659, 2147469679, 2147469703, 2147469781, 2147469817, 2147469823, 2147469829, 2147469881,\
2147469917, 2147469943, 2147469949, 2147469983, 2147470007, 2147470019, 2147470027, 2147470043, 2147470057, 2147470067,\
2147470081, 2147470111, 2147470123, 2147470139, 2147470147, 2147470177, 2147470183, 2147470211, 2147470229, 2147470249,\
2147470313, 2147470327, 2147470333, 2147470361, 2147470427, 2147470453, 2147470511, 2147470513, 2147470529, 2147470531,\
2147470553, 2147470579, 2147470597, 2147470603, 2147470627, 2147470643, 2147470673, 2147470679, 2147470723, 2147470727,\
2147470733, 2147470751, 2147470769, 2147470771,2147483059, 2147483069, 2147483077, 2147483123, 2147483137, 2147483171,\
2147473897, 2147473921, 2147473963, 2147474009, 2147474027, 2147474029, 2147474071, 2147474093, 2147474113, 2147474123,\
2147474149, 2147474159, 2147474201, 2147474213, 2147474239, 2147474279, 2147474359, 2147474383, 2147474393, 2147474477,\
2147474479, 2147474491, 2147474513, 2147474519, 2147474531, 2147474551, 2147474597, 2147474627, 2147474657, 2147474711,\
2147474717, 2147474789, 2147474803, 2147474807, 2147474809, 2147474831, 2147474837, 2147474843, 2147474851, 2147474881,\
2147474887, 2147474891, 2147474921, 2147474929, 2147474947, 2147474951, 2147474963, 2147475047, 2147475061, 2147475103,\
2147475107, 2147475149, 2147475179, 2147475181, 2147475193, 2147475203, 2147475221, 2147475229, 2147475233, 2147475251,\
2147475257, 2147475269, 2147475277, 2147475331, 2147475347, 2147475349, 2147475367, 2147475373, 2147475397, 2147475401,\
2147475413, 2147475439, 2147475481, 2147475487, 2147475497, 2147475503, 2147475509, 2147475521, 2147475541, 2147475553,\
2147475559, 2147475563, 2147475587, 2147475593, 2147475601, 2147475641, 2147475653, 2147475691, 2147475713, 2147475721,\
2147475739, 2147475787, 2147475791, 2147475797, 2147475829, 2147475851, 2147475859, 2147475871, 2147475899, 2147475929,\
2147475971, 2147475973, 2147475977, 2147475997, 2147476031, 2147476073, 2147476087, 2147476109, 2147476127, 2147476139,\
2147476141, 2147476169, 2147476183, 2147476211, 2147476249, 2147476291, 2147476321, 2147476327, 2147476367, 2147476381,\
2147476399, 2147476417, 2147476517, 2147476519, 2147476543, 2147476607, 2147476619, 2147476649, 2147476663, 2147476687,\
2147476693, 2147476699, 2147476739, 2147476741, 2147476763, 2147476769, 2147476777, 2147476789, 2147476819, 2147476823,\
2147476841, 2147476871, 2147476897, 2147476927, 2147476931, 2147476937, 2147476943, 2147476951, 2147476963, 2147476979,\
2147477021, 2147477029, 2147477063, 2147477093, 2147477107, 2147477113, 2147477159, 2147477191, 2147477201, 2147477203,\
2147477207, 2147477209, 2147477237, 2147477249, 2147477273, 2147477323, 2147477393, 2147477399, 2147477419, 2147477443,\
2147477467, 2147477473, 2147477503, 2147477513, 2147477531, 2147477533, 2147477599, 2147477627, 2147477681, 2147477687,\
2147477699, 2147477701, 2147477737, 2147477807, 2147477809, 2147477833, 2147477851, 2147477861, 2147477873, 2147477879,\
2147477881, 2147477933, 2147477953, 2147477989, 2147478013, 2147478017, 2147478049, 2147478079, 2147478083, 2147483179,\
2147478089, 2147478127, 2147478133, 2147478149, 2147478253, 2147478259, 2147478293, 2147478299, 2147478331, 2147478349,\
2147478373, 2147478461, 2147478481, 2147478491, 2147478497, 2147478503, 2147478517, 2147478521, 2147478563, 2147478569,\
2147478581, 2147478601, 2147478611, 2147478647, 2147478649, 2147478653, 2147478659, 2147478661, 2147478673, 2147478701,\
2147478703, 2147478719, 2147478721, 2147478727, 2147478731, 2147478733, 2147478763, 2147478791, 2147478821, 2147478859,\
2147478863, 2147478889, 2147478899, 2147478911, 2147478919, 2147478937, 2147478959, 2147478961, 2147478967, 2147478997,\
2147479013, 2147479031, 2147479057, 2147479063, 2147479079, 2147479091, 2147479097, 2147479121, 2147479129, 2147479133,\
2147479171, 2147479189, 2147479231, 2147479259, 2147479273, 2147479307, 2147479339, 2147479349, 2147479361, 2147479381,\
2147479403, 2147479421, 2147479447, 2147479489, 2147479507, 2147479513, 2147479517, 2147479531, 2147479547, 2147479549,\
2147479573, 2147479589, 2147479601, 2147479619, 2147479637, 2147479643, 2147479657, 2147479681, 2147479751, 2147479753,\
2147479757, 2147479781, 2147479787, 2147479819, 2147479823, 2147479879, 2147479891, 2147479897, 2147479907, 2147479937,\
2147479991, 2147480009, 2147480011, 2147480039, 2147480161, 2147480197, 2147480207, 2147480219, 2147480227, 2147480297,\
2147480299, 2147480311, 2147480327, 2147480369, 2147480429, 2147480437, 2147480459, 2147480471, 2147480507, 2147480519,\
2147480527, 2147480551, 2147480591, 2147480611, 2147480623, 2147480641, 2147480651, 2147480677, 2147480683, 2147480707,\
2147480723, 2147480743, 2147480747, 2147480791, 2147480837, 2147480843, 2147480849, 2147480893, 2147480897, 2147480899,\
2147480921, 2147480927, 2147480941, 2147480957, 2147480969, 2147480971, 2147480989, 2147481019, 2147481031, 2147481053,\
2147481071, 2147481139, 2147481143, 2147481151, 2147481173, 2147481179, 2147481199, 2147481209, 2147481247, 2147481263,\
2147481269, 2147481283, 2147481311, 2147481317, 2147481337, 2147481353, 2147481359, 2147481367, 2147481373, 2147481487,\
2147481491, 2147481499, 2147481509, 2147481529, 2147481563, 2147481571, 2147481629, 2147481673, 2147481793, 2147481797,\
2147481811, 2147481827, 2147481863, 2147481883, 2147481893, 2147481899, 2147481901, 2147481907, 2147481937, 2147481949,\
2147481967, 2147481997, 2147482021, 2147482063, 2147482081, 2147482091, 2147482093, 2147482121, 2147482223, 2147482231,\
2147482237, 2147482273, 2147482291, 2147482327, 2147482343, 2147482349, 2147482361, 2147482367, 2147482409, 2147482417,\
2147482481, 2147482501, 2147482507, 2147482577, 2147482583, 2147482591, 2147482621, 2147482661, 2147482663, 2147482681,\
2147482693, 2147482697, 2147482739, 2147482763, 2147482801, 2147482811, 2147482817, 2147482819, 2147482859, 2147482867,\
2147482873, 2147482877, 2147482921, 2147482937, 2147482943, 2147482949, 2147482951, 2147483029, 2147483033, 2147483053};
int CountMinSketch::hash64to32(unsigned long w,int j) {
int pi_j=Pi_js[j-1];
return w % pi_j;
}
void CountMinSketch::findNonMersPrime() {
int i;
unsigned int num=pi_j_max;
for (i=0;i<lambda;i++) {
num-=1;
while (!isPrime(num)) num-=1;
}
}*/
std::map<int,int> CountMinSketch::getIthArray(int i) {
std::map<int,int> tmp;
return tmp;
......
......@@ -11,11 +11,16 @@
#include <vector>
#include <map>
typedef std::vector<unsigned long> readNumericValues; // TODO move this definition to a common include file between ReadProcessor and CountMinSketch.
#include "rock_commons.h"
typedef struct {
int lambda;
int kappa;
int kappa_prime;
} CMSparams;
class CountMinSketch {
static const unsigned int pi_j_max=2147483647;
static const unsigned long mask1=1;
static const unsigned long mask1=1; // used only for hash64to32bs
static const unsigned long mask2=2095103;
static const unsigned long mask3=1023;
......@@ -29,12 +34,12 @@ class CountMinSketch {
typedef std::map<int,short> internal_array;
std::vector<internal_array> cms_lambda_array;
std::vector<int> pi_j_array;
// std::vector<int> pi_j_array;
// void findNonMersPrime(); // fills pi_j_array with lambda non mersenne prime numbers.
// int hash64to32(unsigned long,int);
int hash64to32(unsigned long,int);
int hash64to32(unsigned long w,int j) { // bit shift version of hash function to start.
int hash64to32bs(unsigned long w,int j) { // bit shift version of hash function to start.
unsigned long h_tmp;
unsigned long h=~w;
h+=w<<18;
......@@ -57,14 +62,7 @@ class CountMinSketch {
void addKMer(unsigned long); // inline? TODO: see later if it can help us gain time.
int isRCovBelowThres(const readNumericValues& read_val,int threshold) ;
// for unit tests.
friend void test_CMS(int lambda,int kappa,int kappa_prime);
/*friend void test_findNonMersPrime(int lambda,int kappa,int kappa_prime);
friend void test_hash();*/
public:
CountMinSketch(int glambda,int gkappa,int gkappa_prime) {
void init(int glambda,int gkappa,int gkappa_prime) {
lambda=glambda;
kappa=gkappa;
kappa_prime=gkappa_prime;
......@@ -74,8 +72,21 @@ public:
for (j=0; j<lambda;j++) {
cms_lambda_array.push_back(cpt_array);
}
pi_j_array.reserve(lambda);
// findNonMersPrime();
}
// for unit tests.
friend void test_CMS(int lambda,int kappa,int kappa_prime);
/*
friend void test_hash();*/
public:
CountMinSketch(int glambda,int gkappa,int gkappa_prime) {
init(glambda,gkappa,gkappa_prime);
}
CountMinSketch(CMSparams parms) {
init(parms.lambda,parms.kappa,parms.kappa_prime);
}
......
......@@ -26,15 +26,7 @@ void test_hash(int lambda,int kappa,int kappa_prime) {
}
}
void test_findNonMersPrime(int lambda,int kappa,int kappa_prime) {
CountMinSketch cms=CountMinSketch(lambda,kappa,kappa_prime);
assert(cms.pi_j_array.size()==lambda);
std::vector<int>::iterator it;
for (it=cms.pi_j_array.begin();it!=cms.pi_j_array.end();it++) {
assert(*it<CountMinSketch::pi_j_max);
}
assert(int (cms.pi_j_array[lambda-1])==2747483641);
}
*/
void test_CMS(int lambda,int kappa,int kappa_prime) {
CountMinSketch cms=CountMinSketch(lambda,kappa,kappa_prime);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment