Skip to content
Snippets Groups Projects
Commit e8abb450 authored by Veronique Legrand's avatar Veronique Legrand
Browse files

bugfix in the case of --PE-separated option.

parent 603a19d8
No related branches found
No related tags found
No related merge requests found
dnl Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
AC_INIT(rock, 1.5)
AC_INIT(rock, 1.5.1)
AC_CANONICAL_SYSTEM
......
......@@ -138,6 +138,7 @@ private:
inline int isRCovBelowThres(const T_read_numericValues& read_val, const int& threshold);
//inline int isRCovBelowThresAux(const readNumericValues& read_val, const int& threshold);
inline int isRCovBelowThresPartial(const readNumericValues& read_val, const int& threshold, const int& start=0,const int& stop=0);
void init(int glambda,int gkappa,int gkappa_prime,int filter_PE_as_single=1);
......@@ -191,9 +192,49 @@ public:
}
};
/*
* Computes median of occurences for all the k_mers in a single read or for a part of a PE read.
*/
template<typename T> inline int CountMinSketch<T>::isRCovBelowThresPartial(const readNumericValues& read_val, const int& threshold, const int& start,const int& stop) {
int a=0,b=0;
int j,h;
T min;
readNumericValues::const_iterator it;
readNumericValues::const_iterator it_aux;
if (stop) it_aux=read_val.begin()+stop;
else it_aux=read_val.end();
for (it=read_val.begin()+start;it!=it_aux;++it) {
j=lambda;
min=mask;
while (--j>=0 && min>threshold) {
h=hash64to32(*it,j);
min=cms_lambda_array[j] [h];
}
(min<threshold)? a+=1:a;
++b;
}
return (2*a>b);
}
/*
* Determines if read (single or PE) has a median coverage below a given threshold
*/
template<typename T> inline int CountMinSketch<T>::isRCovBelowThres(const T_read_numericValues& read_val, const int& threshold) {
int PE1_below_thres;
int PE2_below_thres=0;
PE1_below_thres=isRCovBelowThresPartial(read_val.single_or_PE_val,threshold,0,read_val.idx_start_PE2);
if (read_val.idx_start_PE2 && !filter_PE_as_single) {
PE2_below_thres=isRCovBelowThresPartial(read_val.single_or_PE_val,threshold,read_val.idx_start_PE2,0);
if (threshold==kappa) return (PE1_below_thres || PE2_below_thres);
else return(PE1_below_thres && PE2_below_thres);
}
return PE1_below_thres;
}
/*
template<typename T> inline int CountMinSketch<T>::isRCovBelowThres(const T_read_numericValues& read_val, const int& threshold) {
int PE1_below_thres;
int PE2_below_thres=0;
......@@ -216,6 +257,8 @@ template<typename T> inline int CountMinSketch<T>::isRCovBelowThres(const T_read
}
PE1_below_thres=(2*a>b);
if (read_val.idx_start_PE2 && !filter_PE_as_single) {
a=0;
b=0;
for (it=it_aux;it!=read_val.single_or_PE_val.end();++it) { // idx_start_PE2 is 0 if PE are treated as single or in the case of single reads.
j=lambda;
min=mask;
......@@ -230,6 +273,7 @@ template<typename T> inline int CountMinSketch<T>::isRCovBelowThres(const T_read
}
return PE1_below_thres||PE2_below_thres;
}
*/
template<typename T> void CountMinSketch<T>::init(int glambda,int gkappa,int gkappa_prime,int gfilter_PE_as_single) {
lambda=glambda;
......
......@@ -72,7 +72,7 @@ void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) {
CountMinSketch<unsigned char>* cms=new CountMinSketch<unsigned char>(lambda,kappa,kappa_prime,filter_PE_as_single);
int i;
cout<<"size of the CMS component: "<<sizeof(cms)<<endl;
int num=100*lambda;
int num=200;
//int rej_expected=0;
int ret;
for (i=0;i<kappa;i++) {
......@@ -83,19 +83,33 @@ void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) {
// mimic a PE read (as given by part 2 output).
T_read_numericValues read_values;
// mimic a PE read in which PE1 would cntain a majority of k_mers that are not already in the CMS and PE2 would contain a majority of k_mers that are already in the CMS.
// mimic a PE read in which PE1 contains k-mer that appear rarely in the CMS and PE2 contains k_mers that already appear kappa times in the CMS.
for (i=1;i<=200;i++) {
read_values.single_or_PE_val.push_back(i*1000-1);
}
read_values.idx_start_PE2=199;
read_values.idx_start_PE2=200;
for (i=1;i<=239;i++) {
read_values.single_or_PE_val.push_back(num);
}
read_values.single_or_PE_val.push_back(50);
read_values.single_or_PE_val.push_back(55);
ret=cms->addRead(read_values); // read should be accepted since none of the values that it contains is equal to those I inserted previously.
ret=cms->addRead(read_values); // read should be accepted since PE1 is nor over covered.
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values); // read should not be considered under covered since only PE1 is under covered.
assert(ret==0);
// Now, change PE1 a little and insert it kappa times in the CMS
for (int j=1;j<kappa;j++) {
read_values.single_or_PE_val[2]=j;
ret=cms->addRead(read_values);
if (j<kappa-1) assert(ret>0); // read should be accepted as long as we haven't inserted PE1 kappa times.
else assert(ret>0); //otherwise it is rejected.
}
ret=cms->isBeneathMinKappa(read_values); // read should not be considered under covered since PE1 was inserted kappa times.
assert(ret==0);
// mimic a PE read (as given by part 2 output).
T_read_numericValues read_values2;
......@@ -111,12 +125,56 @@ void testCMS_with_PE_NotasSingle(int lambda,int kappa,int kappa_prime) {
ret=cms->addRead(read_values2);
assert(ret>0);
}
ret=cms->isBeneathMinKappa(read_values);
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values2);
assert(ret==0);
//mimic a read for which PE1 is under kappa_prime and PE2 too. Such a read is expected to be removed by low filter
T_read_numericValues read_values3;
for (i=1;i<=200;i++) {
read_values3.single_or_PE_val.push_back(i);
}
read_values3.idx_start_PE2=201;
for (i=1;i<=205;i++) {
read_values3.single_or_PE_val.push_back(i);
}
ret=cms->addRead(read_values3);
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values3);
assert(ret>0);
//mimic a read for which PE1 is over kappa_prime but not PE2
T_read_numericValues read_values4;
for (i=201;i<=399;i++) {
read_values4.single_or_PE_val.push_back(200);
}
read_values4.idx_start_PE2=199;
for (i=201;i<=399;i++) {
read_values4.single_or_PE_val.push_back(i);
}
ret=cms->addRead(read_values4);
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values4);
assert(ret==0);
//mimic a read for which PE2 is over kappa_prime but not PE1
T_read_numericValues read_values5;
for (i=201;i<=399;i++) {
read_values5.single_or_PE_val.push_back(i);
}
read_values5.idx_start_PE2=199;
for (i=201;i<=399;i++) {
read_values5.single_or_PE_val.push_back(200);
}
ret=cms->addRead(read_values5);
assert(ret>0);
ret=cms->isBeneathMinKappa(read_values5);
assert(ret==0);
delete cms;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment