Skip to content
Snippets Groups Projects
Commit 600ae26d authored by Veronique Legrand's avatar Veronique Legrand
Browse files

added unit tests for main utils and fixed bug in computation of lambda and of collision probability

parent 9b015efd
No related branches found
No related tags found
No related merge requests found
......@@ -142,17 +142,32 @@ int processInOutFileArgs(const std::string& input_file,const std::string output_
int getBestLambdaForN(const unsigned long& nb_k_mers,const int& lambda_max) {
int lambda=2;
int min_lambda=lambda;
float tmp=1/INT_MAX;
tmp=1-tmp;
long double tmp=1.0/INT_MAX;
tmp=1.0-tmp;
tmp=pow(tmp,nb_k_mers);
tmp=1-tmp;
tmp=1.0-tmp;
while (lambda<=lambda_max) {
min_lambda=lambda;
float p=pow(tmp,lambda);
long double p=pow(tmp,lambda);
if (p<=0.01) break;
lambda+=1;
}
return min_lambda;
}
/*
* Compute collision probability
*/
float getCollisionProba(const unsigned long& nb_k_mers,const int& lambda) {
long double tmp=1.0/INT_MAX;
tmp=1.0-tmp;
tmp=pow(tmp,nb_k_mers);
tmp=1.0-tmp;
long double p=pow(tmp,lambda);
if (p<0.0001) p=0;
// rounding
p=p*1000+0.5;
return floor(p)/1000;
//return p;
}
......@@ -17,6 +17,7 @@
unsigned long getNodePhysMemory();
int processInOutFileArgs(const std::string& input_file,const std::string output_file,std::vector<IO_fq_files>& single_files,std::vector<PE_files>& v_PE_files,int& f_id);
int getBestLambdaForN(const unsigned long& nb_k_mers,const int& lambda_max);
float getCollisionProba(const unsigned long& nb_k_mers,const int& lambda);
#endif /* MAIN_UTILS_H_ */
......@@ -13,21 +13,7 @@
using namespace std;
#include "CountMinSketch.hpp"
//TODO Add tests for hash functions? Yes
/*
void test_hash(int lambda,int kappa,int kappa_prime) {
int j,i;
unsigned long intermediate=ULONG_MAX;
unsigned long expected=intermediate;
CountMinSketch cms=CountMinSketch(lambda,kappa,kappa_prime);
for (j=1;j<=lambda;j++) {
//assert(cms.hash64to32(0,j)==pi_j[j]);
}
}
*/
void test_CMS(int lambda,int kappa,int kappa_prime) {
CountMinSketch<unsigned char> cms(lambda,kappa,kappa_prime);
int i;
......@@ -43,6 +29,9 @@ void test_CMS(int lambda,int kappa,int kappa_prime) {
std::cout<<"min="<<min<<endl;
assert((min==num) || (min==num-get_mask<unsigned char>::value-1)); // addKmer doesn't check kappa.
unsigned long nb_distinct_k_mers=cms.getNbDistinctKMers();
assert(nb_distinct_k_mers==1);
// mimic a read (as given by part 2 output).
readNumericValues read_values;
for (i=1;i<=400;i++) {
......@@ -51,6 +40,9 @@ void test_CMS(int lambda,int kappa,int kappa_prime) {
ret=cms.addRead(read_values); // read should be accepted since none of the values that it contains is equal to those I inserted previously.
assert(ret!=rej_expected);
nb_distinct_k_mers=cms.getNbDistinctKMers();
assert(nb_distinct_k_mers==401);
// mimic a vector that contains a lot of k_mers already present in the CMS and check that it is rejected.
readNumericValues read_values2;
for (i=1;i<=199;i++) {
......@@ -61,6 +53,10 @@ void test_CMS(int lambda,int kappa,int kappa_prime) {
}
ret=cms.addRead(read_values2);
assert(ret==rej_expected);
nb_distinct_k_mers=cms.getNbDistinctKMers();
assert(nb_distinct_k_mers==401);
// test that reads with median cov<kappa_prime are rejected.
ret=cms.isBeneathMinKappa(read_values);
assert(ret>0);
......@@ -85,11 +81,5 @@ int main(int argc, char **argv) {
cout<<"testing CMS with lambda="<<lambda<<endl;
test_CMS(lambda,kappa,kappa_prime); // Finally using C arrays (maps implied storing hash keys : 4 Bytes per k_mer overhead) but each array is of size INT_MAX...
/*lambda=6;
cout<<"testing CMS with lambda="<<lambda<<endl;
test_CMS(lambda,kappa,kappa_prime);
lambda=8;
cout<<"testing CMS with lambda="<<lambda<<endl;
test_CMS(lambda,kappa,kappa_prime);*/
cout<<"done"<<endl;
}
......@@ -4,6 +4,6 @@ LOG_COMPILER = $(SHELL)
TESTS = rock.sh
EXTRA_DIST = $(TESTS) data/extract_klebsiella_long_reads_100.txt data/extract_klebsiella_long_reads_100_filtered.txt data/fastq.raw/klebsiella_100_1.fq data/fastq.raw/klebsiella_100_2.fq data/unit/klebsiella_PE1.fq data/unit/klebsiella_PE2.fq data/unit/test_PE1.fq data/unit/test_PE2.fq data/unit/test_PE1_2.fq data/unit/test_PE2_2.fq data/unit/test_single2.fq data/unit/test_single.fq
EXTRA_DIST = $(TESTS) data/extract_klebsiella_long_reads_100.txt data/extract_klebsiella_long_reads_100_filtered.txt data/fastq.raw/klebsiella_100_1.fq data/fastq.raw/klebsiella_100_2.fq data/unit/klebsiella_PE1.fq data/unit/klebsiella_PE2.fq data/unit/test_PE1.fq data/unit/test_PE2.fq data/unit/test_PE1_2.fq data/unit/test_PE2_2.fq data/unit/test_single2.fq data/unit/test_single.fq data/unit/list_input1.txt data/unit/list_output1.txt data/unit/list_input2.txt data/unit/list_output2.txt data/unit/list_input3.txt data/unit/list_output3.txt
......@@ -3,10 +3,12 @@
## Debug mode
test "x$VERBOSE" = "xx" && set -x
pwd
ls -l
[ -z "$srcdir" ] && srcdir="."
echo ${srcdir}
echo "srcdir"=${srcdir}
## doing some cleanup in case previous execution failed.
rm -fr data/fastq.filtered || echo "nothing to clean up"
## check options
echo " "
echo "##################################################################################"
......@@ -116,6 +118,10 @@ echo "##########################################################################
echo "unit testing for fqwriter"
../src/unit_test_fqwriter || exit 25
echo " "
echo "#################################################################################"
echo "unit testing for main utils"
../src/unit_test_main_utils || exit 26
# cleanup
echo " "
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment