Skip to content
Snippets Groups Projects
Commit e1215c12 authored by Veronique Legrand's avatar Veronique Legrand
Browse files

unit tests for count min sketch updated accordingly to the modifs done for the -p option.

parent 6430cf9f
No related branches found
No related tags found
No related merge requests found
......@@ -68,8 +68,9 @@ void test_CMS(int lambda,int kappa,int kappa_prime) {
void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) {
int filter_PE_as_single=0;
CountMinSketch<unsigned char>* cms=new CountMinSketch<unsigned char>(lambda,kappa,kappa_prime,filter_PE_as_single);
// int filter_PE_as_single=0;
int filter_PE_separately=1;
CountMinSketch<unsigned char>* cms=new CountMinSketch<unsigned char>(lambda,kappa,kappa_prime,filter_PE_separately);
int i;
cout<<"size of the CMS component: "<<sizeof(cms)<<endl;
int num=200;
......@@ -100,11 +101,11 @@ void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) {
assert(ret==0);
// Now, change PE1 a little and insert it kappa times in the CMS
for (int j=1;j<kappa;j++) {
for (int j=1;j<=kappa;j++) {
read_values.single_or_PE_val[2]=j;
ret=cms->addRead(read_values);
if (j<kappa-1) assert(ret>0); // read should be accepted as long as we haven't inserted PE1 kappa times.
else assert(ret>0); //otherwise it is rejected.
if (j<=kappa-1) assert(ret>0); // read should be accepted as long as we haven't inserted PE1 kappa times.
else assert(ret==0); //otherwise it is rejected.
}
ret=cms->isBeneathMinKappa(read_values); // read should not be considered under covered since PE1 was inserted kappa times.
......@@ -178,6 +179,120 @@ void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) {
delete cms;
}
/* Test new implementation of PE as single.
* In this implementation, we process PE separately and we keep a PE if med(cov(PE1)) or med((covPE2)) < C and
* med(cov(PE1)) or med(cov(PE2)<c .*/
void testCMS_with_PE_NotasSingle2(int lambda,int kappa,int kappa_prime) {
int filter_PE_separately=2;
CountMinSketch<unsigned char>* cms=new CountMinSketch<unsigned char>(lambda,kappa,kappa_prime,filter_PE_separately);
int i;
cout<<"size of the CMS component: "<<sizeof(cms)<<endl;
int num=200;
//int rej_expected=0;
int ret;
for (i=0;i<kappa;i++) {
cms->addKMer(num);
}
int min=cms->getEstimatedNbOcc(num);
assert(min==kappa);
// mimic a PE read (as given by part 2 output).
T_read_numericValues read_values;
// mimic a PE read in which PE1 contains k-mer that appear rarely in the CMS and PE2 contains k_mers that already appear kappa times in the CMS.
for (i=1;i<=200;i++) {
read_values.single_or_PE_val.push_back(i*1000-1);
}
read_values.idx_start_PE2=200;
for (i=1;i<=239;i++) {
read_values.single_or_PE_val.push_back(num);
}
read_values.single_or_PE_val.push_back(50);
read_values.single_or_PE_val.push_back(55);
ret=cms->addRead(read_values); // read should be accepted since PE1 is nor over covered.
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values); // read should be considered under covered since PE1 is under covered.
assert(ret>0);
// Now, change PE1 a little and insert it kappa times in the CMS
for (int j=1;j<=kappa;j++) {
read_values.single_or_PE_val[2]=j;
ret=cms->addRead(read_values);
if (j<=kappa-1) assert(ret>0); // read should be accepted as long as we haven't inserted PE1 kappa times.
else assert(ret==0); //otherwise it is rejected.
}
ret=cms->isBeneathMinKappa(read_values); // read should not be considered under covered since PE1 was inserted kappa times.
assert(ret==0);
// mimic a PE read (as given by part 2 output).
T_read_numericValues read_values2;
// mimic a PE read that will be inserted in the CMS kappa_prime+1 times to check low filter.
for (i=1;i<=150;i++) {
read_values2.single_or_PE_val.push_back(i*1500-1);
}
read_values2.idx_start_PE2=150;
for (i=1;i<=151;i++) {
read_values2.single_or_PE_val.push_back(i*1600-1);
}
for (i=0;i<kappa_prime+1;i++) {
ret=cms->addRead(read_values2);
assert(ret>0);
}
ret=cms->isBeneathMinKappa(read_values2);
assert(ret==0);
//mimic a read for which PE1 is under kappa_prime and PE2 too. Such a read is expected to be removed by low filter
T_read_numericValues read_values3;
for (i=1;i<=200;i++) {
read_values3.single_or_PE_val.push_back(i);
}
read_values3.idx_start_PE2=201;
for (i=1;i<=205;i++) {
read_values3.single_or_PE_val.push_back(i);
}
ret=cms->addRead(read_values3);
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values3);
assert(ret>0);
//mimic a read for which PE1 is over kappa_prime but not PE2
T_read_numericValues read_values4;
for (i=201;i<=399;i++) {
read_values4.single_or_PE_val.push_back(200);
}
read_values4.idx_start_PE2=199;
for (i=201;i<=399;i++) {
read_values4.single_or_PE_val.push_back(i);
}
ret=cms->addRead(read_values4);
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values4);
assert(ret>0);
//mimic a read for which PE2 is over kappa_prime but not PE1
T_read_numericValues read_values5;
for (i=201;i<=399;i++) {
read_values5.single_or_PE_val.push_back(i);
}
read_values5.idx_start_PE2=199;
for (i=201;i<=399;i++) {
read_values5.single_or_PE_val.push_back(200);
}
ret=cms->addRead(read_values5);
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values5);
assert(ret>0);
delete cms;
}
/*
* TODO: move this somewhere else.
* I use this test because it is in the calculation of the median that we spend the most time (approx: 50%) . I want to know what in it takes the longest to see if I can improve.
......@@ -243,8 +358,10 @@ int main(int argc, char **argv) {
cout<<"testing CMS with lambda="<<lambda<<endl;
test_CMS(lambda,kappa,kappa_prime); // Finally using C arrays (maps implied storing hash keys : 4 Bytes per k_mer overhead) but each array is of size INT_MAX...
cout<<"testing CMS with PE not as single"<<endl;
cout<<"testing CMS with PE not as single strict kappa prime implementation"<<endl;
testCMS_with_PE_NotasSingle(lambda,kappa,kappa_prime);
cout<<"testing CMS with PE not as single lenient kappa prime implementation"<<endl;
testCMS_with_PE_NotasSingle2(lambda,kappa,kappa_prime);
cout<<"done"<<endl;
// profiling_CMS_fill();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment