diff --git a/src/unit_test_cms.cpp b/src/unit_test_cms.cpp index 8f71278e40f98518ac2cde5968871f92c752cdaf..b77fbf459dcd99536766559970762461f7c32cc2 100644 --- a/src/unit_test_cms.cpp +++ b/src/unit_test_cms.cpp @@ -68,8 +68,9 @@ void test_CMS(int lambda,int kappa,int kappa_prime) { void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) { - int filter_PE_as_single=0; - CountMinSketch<unsigned char>* cms=new CountMinSketch<unsigned char>(lambda,kappa,kappa_prime,filter_PE_as_single); + // int filter_PE_as_single=0; + int filter_PE_separately=1; + CountMinSketch<unsigned char>* cms=new CountMinSketch<unsigned char>(lambda,kappa,kappa_prime,filter_PE_separately); int i; cout<<"size of the CMS component: "<<sizeof(cms)<<endl; int num=200; @@ -100,11 +101,11 @@ void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) { assert(ret==0); // Now, change PE1 a little and insert it kappa times in the CMS - for (int j=1;j<kappa;j++) { + for (int j=1;j<=kappa;j++) { read_values.single_or_PE_val[2]=j; ret=cms->addRead(read_values); - if (j<kappa-1) assert(ret>0); // read should be accepted as long as we haven't inserted PE1 kappa times. - else assert(ret>0); //otherwise it is rejected. + if (j<=kappa-1) assert(ret>0); // read should be accepted as long as we haven't inserted PE1 kappa times. + else assert(ret==0); //otherwise it is rejected. } ret=cms->isBeneathMinKappa(read_values); // read should not be considered under covered since PE1 was inserted kappa times. @@ -178,6 +179,120 @@ void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) { delete cms; } +/* Test new implementation of PE as single. + * In this implementation, we process PE separately and we keep a PE if med(cov(PE1)) or med((covPE2)) < C and + * med(cov(PE1)) or med(cov(PE2)<c .*/ +void testCMS_with_PE_NotasSingle2(int lambda,int kappa,int kappa_prime) { + int filter_PE_separately=2; + CountMinSketch<unsigned char>* cms=new CountMinSketch<unsigned char>(lambda,kappa,kappa_prime,filter_PE_separately); + int i; + cout<<"size of the CMS component: "<<sizeof(cms)<<endl; + int num=200; + //int rej_expected=0; + int ret; + for (i=0;i<kappa;i++) { + cms->addKMer(num); + } + int min=cms->getEstimatedNbOcc(num); + assert(min==kappa); + // mimic a PE read (as given by part 2 output). + T_read_numericValues read_values; + + // mimic a PE read in which PE1 contains k-mer that appear rarely in the CMS and PE2 contains k_mers that already appear kappa times in the CMS. + for (i=1;i<=200;i++) { + read_values.single_or_PE_val.push_back(i*1000-1); + } + read_values.idx_start_PE2=200; + for (i=1;i<=239;i++) { + read_values.single_or_PE_val.push_back(num); + } + read_values.single_or_PE_val.push_back(50); + read_values.single_or_PE_val.push_back(55); + ret=cms->addRead(read_values); // read should be accepted since PE1 is nor over covered. + assert(ret>0); + + ret=cms->isBeneathMinKappa(read_values); // read should be considered under covered since PE1 is under covered. + assert(ret>0); + + // Now, change PE1 a little and insert it kappa times in the CMS + for (int j=1;j<=kappa;j++) { + read_values.single_or_PE_val[2]=j; + ret=cms->addRead(read_values); + if (j<=kappa-1) assert(ret>0); // read should be accepted as long as we haven't inserted PE1 kappa times. + else assert(ret==0); //otherwise it is rejected. + } + + ret=cms->isBeneathMinKappa(read_values); // read should not be considered under covered since PE1 was inserted kappa times. + assert(ret==0); + + // mimic a PE read (as given by part 2 output). + T_read_numericValues read_values2; + + // mimic a PE read that will be inserted in the CMS kappa_prime+1 times to check low filter. + for (i=1;i<=150;i++) { + read_values2.single_or_PE_val.push_back(i*1500-1); + } + read_values2.idx_start_PE2=150; + for (i=1;i<=151;i++) { + read_values2.single_or_PE_val.push_back(i*1600-1); + } + for (i=0;i<kappa_prime+1;i++) { + ret=cms->addRead(read_values2); + assert(ret>0); + } + + ret=cms->isBeneathMinKappa(read_values2); + assert(ret==0); + + //mimic a read for which PE1 is under kappa_prime and PE2 too. Such a read is expected to be removed by low filter + T_read_numericValues read_values3; + + for (i=1;i<=200;i++) { + read_values3.single_or_PE_val.push_back(i); + } + read_values3.idx_start_PE2=201; + for (i=1;i<=205;i++) { + read_values3.single_or_PE_val.push_back(i); + } + ret=cms->addRead(read_values3); + assert(ret>0); + + ret=cms->isBeneathMinKappa(read_values3); + assert(ret>0); + + //mimic a read for which PE1 is over kappa_prime but not PE2 + T_read_numericValues read_values4; + for (i=201;i<=399;i++) { + read_values4.single_or_PE_val.push_back(200); + } + read_values4.idx_start_PE2=199; + for (i=201;i<=399;i++) { + read_values4.single_or_PE_val.push_back(i); + } + ret=cms->addRead(read_values4); + assert(ret>0); + + ret=cms->isBeneathMinKappa(read_values4); + assert(ret>0); + + //mimic a read for which PE2 is over kappa_prime but not PE1 + T_read_numericValues read_values5; + for (i=201;i<=399;i++) { + read_values5.single_or_PE_val.push_back(i); + } + read_values5.idx_start_PE2=199; + for (i=201;i<=399;i++) { + read_values5.single_or_PE_val.push_back(200); + } + ret=cms->addRead(read_values5); + assert(ret>0); + + ret=cms->isBeneathMinKappa(read_values5); + assert(ret>0); + + delete cms; +} + /* * TODO: move this somewhere else. * I use this test because it is in the calculation of the median that we spend the most time (approx: 50%) . I want to know what in it takes the longest to see if I can improve. @@ -243,8 +358,10 @@ int main(int argc, char **argv) { cout<<"testing CMS with lambda="<<lambda<<endl; test_CMS(lambda,kappa,kappa_prime); // Finally using C arrays (maps implied storing hash keys : 4 Bytes per k_mer overhead) but each array is of size INT_MAX... - cout<<"testing CMS with PE not as single"<<endl; + cout<<"testing CMS with PE not as single strict kappa prime implementation"<<endl; testCMS_with_PE_NotasSingle(lambda,kappa,kappa_prime); + cout<<"testing CMS with PE not as single lenient kappa prime implementation"<<endl; + testCMS_with_PE_NotasSingle2(lambda,kappa,kappa_prime); cout<<"done"<<endl; // profiling_CMS_fill();