diff --git a/src/main_utils.cpp b/src/main_utils.cpp index ac6653d4a13923a3349c47170a4ceac0d0a10a9b..2f3e279b0345304859d7c87aa17610d77d566863 100644 --- a/src/main_utils.cpp +++ b/src/main_utils.cpp @@ -142,17 +142,32 @@ int processInOutFileArgs(const std::string& input_file,const std::string output_ int getBestLambdaForN(const unsigned long& nb_k_mers,const int& lambda_max) { int lambda=2; int min_lambda=lambda; - float tmp=1/INT_MAX; - tmp=1-tmp; + long double tmp=1.0/INT_MAX; + tmp=1.0-tmp; tmp=pow(tmp,nb_k_mers); - tmp=1-tmp; + tmp=1.0-tmp; while (lambda<=lambda_max) { min_lambda=lambda; - float p=pow(tmp,lambda); + long double p=pow(tmp,lambda); if (p<=0.01) break; lambda+=1; } return min_lambda; } +/* + * Compute collision probability + */ +float getCollisionProba(const unsigned long& nb_k_mers,const int& lambda) { + long double tmp=1.0/INT_MAX; + tmp=1.0-tmp; + tmp=pow(tmp,nb_k_mers); + tmp=1.0-tmp; + long double p=pow(tmp,lambda); + if (p<0.0001) p=0; + // rounding + p=p*1000+0.5; + return floor(p)/1000; + //return p; +} diff --git a/src/main_utils.h b/src/main_utils.h index 80cd267c29f1a5960dd32b27faa1d80f92d99ffe..50ebafe8808ba05a4043234ef5c0e0d794fa73aa 100644 --- a/src/main_utils.h +++ b/src/main_utils.h @@ -17,6 +17,7 @@ unsigned long getNodePhysMemory(); int processInOutFileArgs(const std::string& input_file,const std::string output_file,std::vector<IO_fq_files>& single_files,std::vector<PE_files>& v_PE_files,int& f_id); int getBestLambdaForN(const unsigned long& nb_k_mers,const int& lambda_max); +float getCollisionProba(const unsigned long& nb_k_mers,const int& lambda); #endif /* MAIN_UTILS_H_ */ diff --git a/src/unit_test_cms.cpp b/src/unit_test_cms.cpp index 397ce29110472e78e9a675d17d7b0c8d4dd55a87..47349ceede31231c68da185a94b5375add5fcbae 100644 --- a/src/unit_test_cms.cpp +++ b/src/unit_test_cms.cpp @@ -13,21 +13,7 @@ using namespace std; #include "CountMinSketch.hpp" -//TODO Add tests for hash functions? Yes -/* -void test_hash(int lambda,int kappa,int kappa_prime) { - int j,i; - unsigned long intermediate=ULONG_MAX; - unsigned long expected=intermediate; - CountMinSketch cms=CountMinSketch(lambda,kappa,kappa_prime); - - for (j=1;j<=lambda;j++) { - //assert(cms.hash64to32(0,j)==pi_j[j]); - } -} - -*/ void test_CMS(int lambda,int kappa,int kappa_prime) { CountMinSketch<unsigned char> cms(lambda,kappa,kappa_prime); int i; @@ -43,6 +29,9 @@ void test_CMS(int lambda,int kappa,int kappa_prime) { std::cout<<"min="<<min<<endl; assert((min==num) || (min==num-get_mask<unsigned char>::value-1)); // addKmer doesn't check kappa. + unsigned long nb_distinct_k_mers=cms.getNbDistinctKMers(); + assert(nb_distinct_k_mers==1); + // mimic a read (as given by part 2 output). readNumericValues read_values; for (i=1;i<=400;i++) { @@ -51,6 +40,9 @@ void test_CMS(int lambda,int kappa,int kappa_prime) { ret=cms.addRead(read_values); // read should be accepted since none of the values that it contains is equal to those I inserted previously. assert(ret!=rej_expected); + nb_distinct_k_mers=cms.getNbDistinctKMers(); + assert(nb_distinct_k_mers==401); + // mimic a vector that contains a lot of k_mers already present in the CMS and check that it is rejected. readNumericValues read_values2; for (i=1;i<=199;i++) { @@ -61,6 +53,10 @@ void test_CMS(int lambda,int kappa,int kappa_prime) { } ret=cms.addRead(read_values2); assert(ret==rej_expected); + + nb_distinct_k_mers=cms.getNbDistinctKMers(); + assert(nb_distinct_k_mers==401); + // test that reads with median cov<kappa_prime are rejected. ret=cms.isBeneathMinKappa(read_values); assert(ret>0); @@ -85,11 +81,5 @@ int main(int argc, char **argv) { cout<<"testing CMS with lambda="<<lambda<<endl; test_CMS(lambda,kappa,kappa_prime); // Finally using C arrays (maps implied storing hash keys : 4 Bytes per k_mer overhead) but each array is of size INT_MAX... - /*lambda=6; - cout<<"testing CMS with lambda="<<lambda<<endl; - test_CMS(lambda,kappa,kappa_prime); - lambda=8; - cout<<"testing CMS with lambda="<<lambda<<endl; - test_CMS(lambda,kappa,kappa_prime);*/ cout<<"done"<<endl; } diff --git a/test/Makefile.am b/test/Makefile.am index 9cb51099c068dd117b6bd85bdfe6ec45c13c33f7..7e30e82e5e33b6e2e900284b3662974ec4017afe 100755 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -4,6 +4,6 @@ LOG_COMPILER = $(SHELL) TESTS = rock.sh -EXTRA_DIST = $(TESTS) data/extract_klebsiella_long_reads_100.txt data/extract_klebsiella_long_reads_100_filtered.txt data/fastq.raw/klebsiella_100_1.fq data/fastq.raw/klebsiella_100_2.fq data/unit/klebsiella_PE1.fq data/unit/klebsiella_PE2.fq data/unit/test_PE1.fq data/unit/test_PE2.fq data/unit/test_PE1_2.fq data/unit/test_PE2_2.fq data/unit/test_single2.fq data/unit/test_single.fq +EXTRA_DIST = $(TESTS) data/extract_klebsiella_long_reads_100.txt data/extract_klebsiella_long_reads_100_filtered.txt data/fastq.raw/klebsiella_100_1.fq data/fastq.raw/klebsiella_100_2.fq data/unit/klebsiella_PE1.fq data/unit/klebsiella_PE2.fq data/unit/test_PE1.fq data/unit/test_PE2.fq data/unit/test_PE1_2.fq data/unit/test_PE2_2.fq data/unit/test_single2.fq data/unit/test_single.fq data/unit/list_input1.txt data/unit/list_output1.txt data/unit/list_input2.txt data/unit/list_output2.txt data/unit/list_input3.txt data/unit/list_output3.txt diff --git a/test/rock.sh b/test/rock.sh index 2c699ed229be244f6899cd5a393265f95d55c5b5..649fdbf13a3a9ee7504437faa790bf8d5f3588b3 100755 --- a/test/rock.sh +++ b/test/rock.sh @@ -3,10 +3,12 @@ ## Debug mode test "x$VERBOSE" = "xx" && set -x -pwd -ls -l [ -z "$srcdir" ] && srcdir="." -echo ${srcdir} +echo "srcdir"=${srcdir} + +## doing some cleanup in case previous execution failed. +rm -fr data/fastq.filtered || echo "nothing to clean up" + ## check options echo " " echo "##################################################################################" @@ -116,6 +118,10 @@ echo "########################################################################## echo "unit testing for fqwriter" ../src/unit_test_fqwriter || exit 25 +echo " " +echo "#################################################################################" +echo "unit testing for main utils" +../src/unit_test_main_utils || exit 26 # cleanup echo " "