Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
ROCK
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Véronique LEGRAND
ROCK
Commits
3f5b4fb0
Commit
3f5b4fb0
authored
9 years ago
by
Veronique Legrand
Browse files
Options
Downloads
Patches
Plain Diff
finished CMS component
parent
d29df2fe
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/CountMinSketch.cpp
+56
-29
56 additions, 29 deletions
src/CountMinSketch.cpp
src/CountMinSketch.h
+28
-17
28 additions, 17 deletions
src/CountMinSketch.h
src/unit_test_cms.cpp
+1
-9
1 addition, 9 deletions
src/unit_test_cms.cpp
with
85 additions
and
55 deletions
src/CountMinSketch.cpp
+
56
−
29
View file @
3f5b4fb0
...
...
@@ -10,38 +10,65 @@
const
int
max_pow
=
30
;
/* This method is used to determine if a number is a prime number or not.
* It is incomplete. TODO find an effective method to get lamdba prime numbers when we'll be sure whether we use the
* "prime number" version of the hash functions. */
/*int CountMinSketch::isPrime(unsigned int num) {
if ((num % 2 ==0) || (num==2)) return 0;
if ((num % 3 ==0) || (num==3)) return 0;
if ((num % 5 ==0) || (num==5)) return 0;
if ((num % 7 ==0) || (num==7)) return 0;
return 1;
}
int CountMinSketch::isMersenne(unsigned int num) {
int cur_pow=max_pow;
unsigned int mers_nbr=pow(2,cur_pow)-1;
while (num!=mers_nbr && cur_pow>=1) {
cur_pow-=1;
mers_nbr=pow(2,cur_pow)-1;
}
if (cur_pow==0) return 0;
else return 1;
// Store the non mersenne prime numbers for modulo hashing in this array.
int
Pi_js
[
500
]
=
{
2147469629
,
2147469637
,
2147469659
,
2147469679
,
2147469703
,
2147469781
,
2147469817
,
2147469823
,
2147469829
,
2147469881
,
\
2147469917
,
2147469943
,
2147469949
,
2147469983
,
2147470007
,
2147470019
,
2147470027
,
2147470043
,
2147470057
,
2147470067
,
\
2147470081
,
2147470111
,
2147470123
,
2147470139
,
2147470147
,
2147470177
,
2147470183
,
2147470211
,
2147470229
,
2147470249
,
\
2147470313
,
2147470327
,
2147470333
,
2147470361
,
2147470427
,
2147470453
,
2147470511
,
2147470513
,
2147470529
,
2147470531
,
\
2147470553
,
2147470579
,
2147470597
,
2147470603
,
2147470627
,
2147470643
,
2147470673
,
2147470679
,
2147470723
,
2147470727
,
\
2147470733
,
2147470751
,
2147470769
,
2147470771
,
2147483059
,
2147483069
,
2147483077
,
2147483123
,
2147483137
,
2147483171
,
\
2147473897
,
2147473921
,
2147473963
,
2147474009
,
2147474027
,
2147474029
,
2147474071
,
2147474093
,
2147474113
,
2147474123
,
\
2147474149
,
2147474159
,
2147474201
,
2147474213
,
2147474239
,
2147474279
,
2147474359
,
2147474383
,
2147474393
,
2147474477
,
\
2147474479
,
2147474491
,
2147474513
,
2147474519
,
2147474531
,
2147474551
,
2147474597
,
2147474627
,
2147474657
,
2147474711
,
\
2147474717
,
2147474789
,
2147474803
,
2147474807
,
2147474809
,
2147474831
,
2147474837
,
2147474843
,
2147474851
,
2147474881
,
\
2147474887
,
2147474891
,
2147474921
,
2147474929
,
2147474947
,
2147474951
,
2147474963
,
2147475047
,
2147475061
,
2147475103
,
\
2147475107
,
2147475149
,
2147475179
,
2147475181
,
2147475193
,
2147475203
,
2147475221
,
2147475229
,
2147475233
,
2147475251
,
\
2147475257
,
2147475269
,
2147475277
,
2147475331
,
2147475347
,
2147475349
,
2147475367
,
2147475373
,
2147475397
,
2147475401
,
\
2147475413
,
2147475439
,
2147475481
,
2147475487
,
2147475497
,
2147475503
,
2147475509
,
2147475521
,
2147475541
,
2147475553
,
\
2147475559
,
2147475563
,
2147475587
,
2147475593
,
2147475601
,
2147475641
,
2147475653
,
2147475691
,
2147475713
,
2147475721
,
\
2147475739
,
2147475787
,
2147475791
,
2147475797
,
2147475829
,
2147475851
,
2147475859
,
2147475871
,
2147475899
,
2147475929
,
\
2147475971
,
2147475973
,
2147475977
,
2147475997
,
2147476031
,
2147476073
,
2147476087
,
2147476109
,
2147476127
,
2147476139
,
\
2147476141
,
2147476169
,
2147476183
,
2147476211
,
2147476249
,
2147476291
,
2147476321
,
2147476327
,
2147476367
,
2147476381
,
\
2147476399
,
2147476417
,
2147476517
,
2147476519
,
2147476543
,
2147476607
,
2147476619
,
2147476649
,
2147476663
,
2147476687
,
\
2147476693
,
2147476699
,
2147476739
,
2147476741
,
2147476763
,
2147476769
,
2147476777
,
2147476789
,
2147476819
,
2147476823
,
\
2147476841
,
2147476871
,
2147476897
,
2147476927
,
2147476931
,
2147476937
,
2147476943
,
2147476951
,
2147476963
,
2147476979
,
\
2147477021
,
2147477029
,
2147477063
,
2147477093
,
2147477107
,
2147477113
,
2147477159
,
2147477191
,
2147477201
,
2147477203
,
\
2147477207
,
2147477209
,
2147477237
,
2147477249
,
2147477273
,
2147477323
,
2147477393
,
2147477399
,
2147477419
,
2147477443
,
\
2147477467
,
2147477473
,
2147477503
,
2147477513
,
2147477531
,
2147477533
,
2147477599
,
2147477627
,
2147477681
,
2147477687
,
\
2147477699
,
2147477701
,
2147477737
,
2147477807
,
2147477809
,
2147477833
,
2147477851
,
2147477861
,
2147477873
,
2147477879
,
\
2147477881
,
2147477933
,
2147477953
,
2147477989
,
2147478013
,
2147478017
,
2147478049
,
2147478079
,
2147478083
,
2147483179
,
\
2147478089
,
2147478127
,
2147478133
,
2147478149
,
2147478253
,
2147478259
,
2147478293
,
2147478299
,
2147478331
,
2147478349
,
\
2147478373
,
2147478461
,
2147478481
,
2147478491
,
2147478497
,
2147478503
,
2147478517
,
2147478521
,
2147478563
,
2147478569
,
\
2147478581
,
2147478601
,
2147478611
,
2147478647
,
2147478649
,
2147478653
,
2147478659
,
2147478661
,
2147478673
,
2147478701
,
\
2147478703
,
2147478719
,
2147478721
,
2147478727
,
2147478731
,
2147478733
,
2147478763
,
2147478791
,
2147478821
,
2147478859
,
\
2147478863
,
2147478889
,
2147478899
,
2147478911
,
2147478919
,
2147478937
,
2147478959
,
2147478961
,
2147478967
,
2147478997
,
\
2147479013
,
2147479031
,
2147479057
,
2147479063
,
2147479079
,
2147479091
,
2147479097
,
2147479121
,
2147479129
,
2147479133
,
\
2147479171
,
2147479189
,
2147479231
,
2147479259
,
2147479273
,
2147479307
,
2147479339
,
2147479349
,
2147479361
,
2147479381
,
\
2147479403
,
2147479421
,
2147479447
,
2147479489
,
2147479507
,
2147479513
,
2147479517
,
2147479531
,
2147479547
,
2147479549
,
\
2147479573
,
2147479589
,
2147479601
,
2147479619
,
2147479637
,
2147479643
,
2147479657
,
2147479681
,
2147479751
,
2147479753
,
\
2147479757
,
2147479781
,
2147479787
,
2147479819
,
2147479823
,
2147479879
,
2147479891
,
2147479897
,
2147479907
,
2147479937
,
\
2147479991
,
2147480009
,
2147480011
,
2147480039
,
2147480161
,
2147480197
,
2147480207
,
2147480219
,
2147480227
,
2147480297
,
\
2147480299
,
2147480311
,
2147480327
,
2147480369
,
2147480429
,
2147480437
,
2147480459
,
2147480471
,
2147480507
,
2147480519
,
\
2147480527
,
2147480551
,
2147480591
,
2147480611
,
2147480623
,
2147480641
,
2147480651
,
2147480677
,
2147480683
,
2147480707
,
\
2147480723
,
2147480743
,
2147480747
,
2147480791
,
2147480837
,
2147480843
,
2147480849
,
2147480893
,
2147480897
,
2147480899
,
\
2147480921
,
2147480927
,
2147480941
,
2147480957
,
2147480969
,
2147480971
,
2147480989
,
2147481019
,
2147481031
,
2147481053
,
\
2147481071
,
2147481139
,
2147481143
,
2147481151
,
2147481173
,
2147481179
,
2147481199
,
2147481209
,
2147481247
,
2147481263
,
\
2147481269
,
2147481283
,
2147481311
,
2147481317
,
2147481337
,
2147481353
,
2147481359
,
2147481367
,
2147481373
,
2147481487
,
\
2147481491
,
2147481499
,
2147481509
,
2147481529
,
2147481563
,
2147481571
,
2147481629
,
2147481673
,
2147481793
,
2147481797
,
\
2147481811
,
2147481827
,
2147481863
,
2147481883
,
2147481893
,
2147481899
,
2147481901
,
2147481907
,
2147481937
,
2147481949
,
\
2147481967
,
2147481997
,
2147482021
,
2147482063
,
2147482081
,
2147482091
,
2147482093
,
2147482121
,
2147482223
,
2147482231
,
\
2147482237
,
2147482273
,
2147482291
,
2147482327
,
2147482343
,
2147482349
,
2147482361
,
2147482367
,
2147482409
,
2147482417
,
\
2147482481
,
2147482501
,
2147482507
,
2147482577
,
2147482583
,
2147482591
,
2147482621
,
2147482661
,
2147482663
,
2147482681
,
\
2147482693
,
2147482697
,
2147482739
,
2147482763
,
2147482801
,
2147482811
,
2147482817
,
2147482819
,
2147482859
,
2147482867
,
\
2147482873
,
2147482877
,
2147482921
,
2147482937
,
2147482943
,
2147482949
,
2147482951
,
2147483029
,
2147483033
,
2147483053
};
int
CountMinSketch
::
hash64to32
(
unsigned
long
w
,
int
j
)
{
int
pi_j
=
Pi_js
[
j
-
1
];
return
w
%
pi_j
;
}
void CountMinSketch::findNonMersPrime() {
int i;
unsigned int num=pi_j_max;
for (i=0;i<lambda;i++) {
num-=1;
while (!isPrime(num)) num-=1;
}
}*/
std
::
map
<
int
,
int
>
CountMinSketch
::
getIthArray
(
int
i
)
{
std
::
map
<
int
,
int
>
tmp
;
return
tmp
;
...
...
This diff is collapsed.
Click to expand it.
src/CountMinSketch.h
+
28
−
17
View file @
3f5b4fb0
...
...
@@ -11,11 +11,16 @@
#include
<vector>
#include
<map>
typedef
std
::
vector
<
unsigned
long
>
readNumericValues
;
// TODO move this definition to a common include file between ReadProcessor and CountMinSketch.
#include
"rock_commons.h"
typedef
struct
{
int
lambda
;
int
kappa
;
int
kappa_prime
;
}
CMSparams
;
class
CountMinSketch
{
static
const
unsigned
int
pi_j_max
=
2147483647
;
static
const
unsigned
long
mask1
=
1
;
static
const
unsigned
long
mask1
=
1
;
// used only for hash64to32bs
static
const
unsigned
long
mask2
=
2095103
;
static
const
unsigned
long
mask3
=
1023
;
...
...
@@ -29,12 +34,12 @@ class CountMinSketch {
typedef
std
::
map
<
int
,
short
>
internal_array
;
std
::
vector
<
internal_array
>
cms_lambda_array
;
std
::
vector
<
int
>
pi_j_array
;
// std::vector<int> pi_j_array;
// void findNonMersPrime(); // fills pi_j_array with lambda non mersenne prime numbers.
// int hash64to32(unsigned long,int);
int
hash64to32
(
unsigned
long
,
int
);
int
hash64to32
(
unsigned
long
w
,
int
j
)
{
// bit shift version of hash function to start.
int
hash64to32
bs
(
unsigned
long
w
,
int
j
)
{
// bit shift version of hash function to start.
unsigned
long
h_tmp
;
unsigned
long
h
=~
w
;
h
+=
w
<<
18
;
...
...
@@ -57,14 +62,7 @@ class CountMinSketch {
void
addKMer
(
unsigned
long
);
// inline? TODO: see later if it can help us gain time.
int
isRCovBelowThres
(
const
readNumericValues
&
read_val
,
int
threshold
)
;
// for unit tests.
friend
void
test_CMS
(
int
lambda
,
int
kappa
,
int
kappa_prime
);
/*friend void test_findNonMersPrime(int lambda,int kappa,int kappa_prime);
friend void test_hash();*/
public
:
CountMinSketch
(
int
glambda
,
int
gkappa
,
int
gkappa_prime
)
{
void
init
(
int
glambda
,
int
gkappa
,
int
gkappa_prime
)
{
lambda
=
glambda
;
kappa
=
gkappa
;
kappa_prime
=
gkappa_prime
;
...
...
@@ -74,8 +72,21 @@ public:
for
(
j
=
0
;
j
<
lambda
;
j
++
)
{
cms_lambda_array
.
push_back
(
cpt_array
);
}
pi_j_array
.
reserve
(
lambda
);
// findNonMersPrime();
}
// for unit tests.
friend
void
test_CMS
(
int
lambda
,
int
kappa
,
int
kappa_prime
);
/*
friend void test_hash();*/
public
:
CountMinSketch
(
int
glambda
,
int
gkappa
,
int
gkappa_prime
)
{
init
(
glambda
,
gkappa
,
gkappa_prime
);
}
CountMinSketch
(
CMSparams
parms
)
{
init
(
parms
.
lambda
,
parms
.
kappa
,
parms
.
kappa_prime
);
}
...
...
This diff is collapsed.
Click to expand it.
src/unit_test_cms.cpp
+
1
−
9
View file @
3f5b4fb0
...
...
@@ -26,15 +26,7 @@ void test_hash(int lambda,int kappa,int kappa_prime) {
}
}
void test_findNonMersPrime(int lambda,int kappa,int kappa_prime) {
CountMinSketch cms=CountMinSketch(lambda,kappa,kappa_prime);
assert(cms.pi_j_array.size()==lambda);
std::vector<int>::iterator it;
for (it=cms.pi_j_array.begin();it!=cms.pi_j_array.end();it++) {
assert(*it<CountMinSketch::pi_j_max);
}
assert(int (cms.pi_j_array[lambda-1])==2747483641);
}
*/
void
test_CMS
(
int
lambda
,
int
kappa
,
int
kappa_prime
)
{
CountMinSketch
cms
=
CountMinSketch
(
lambda
,
kappa
,
kappa_prime
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment