diff --git a/configure.ac b/configure.ac index e2143e2c51a54c8233d96256c1efb1dbebefb5e6..647b9c0da8e54000fd2e1f6753dae39c7cea9ef6 100644 --- a/configure.ac +++ b/configure.ac @@ -1,10 +1,10 @@ dnl Process this file with autoconf to produce a configure script. -AC_PREREQ(2.59) -AC_INIT(rock, 1.9.6) +AC_PREREQ([2.69]) +AC_INIT([rock],[2.0]) -AC_CANONICAL_SYSTEM +AC_CANONICAL_TARGET AM_INIT_AUTOMAKE() # Checks for programs. diff --git a/src/CountMinSketch.hpp b/src/CountMinSketch.hpp index a8f07020e2925ebddddee1df94da858d6db77750..e2519005659971225590fcb9fb664c7699c31ffb 100644 --- a/src/CountMinSketch.hpp +++ b/src/CountMinSketch.hpp @@ -42,6 +42,7 @@ typedef struct { int kappa_prime; int max_filter_size; // max amount of RAM wanted for the CMS. float wanted_max_collision_proba; + int process_PE_separately; } CMSparams; template <typename T> @@ -80,7 +81,9 @@ private: } - inline int isRCovBelowThres(const readNumericValues& read_val, const int& threshold); + + + inline int isRCovBelowThres(const T_read_numericValues& read_val, const int& threshold); inline int isRCovBelowThresPartial(const readNumericValues& read_val, const int& threshold, const int& start=0,const int& stop=0); @@ -130,9 +133,9 @@ public: // keep that just for unit testing purpose T getEstimatedNbOcc(const unsigned long& val); - int addRead(const readNumericValues& read_val); + int addRead(const T_read_numericValues& read_val); - int isBeneathMinKappa(const readNumericValues&read_val); + int isBeneathMinKappa(const T_read_numericValues&read_val); unsigned long getNbDistinctKMers(); @@ -145,31 +148,54 @@ public: * Determines whether median of k-mer coverage is below threshold or not. */ // this is a few seconds faster -template<typename T> inline int CountMinSketch<T>::isRCovBelowThres(const readNumericValues& read_val, const int& threshold) { +template<typename T> inline int CountMinSketch<T>::isRCovBelowThres(const T_read_numericValues& read_val, const int& threshold) { int PE1_below_thres; + int PE2_below_thres=0; int a1=0,a2=0; - int b1; + int b1,b2=0; int j; unsigned int h; T min; - int num=read_val.size(); - b1=num; + int num=read_val.single_or_PE_val.size(); + + read_val.idx_start_PE2?b1=read_val.idx_start_PE2:b1=num; + read_val.idx_start_PE2?b2=num-b1:b2; // this proves faster than using iterators on these data (short) - long const * p_start=(long const * )&(read_val[0]); // TODO: when we'll use C++11: use the data() method of the vector class to access the underlying C array. + long const * p_start=(long const * )&(read_val.single_or_PE_val[0]); // TODO: when we'll use C++11: use the data() method of the vector class to access the underlying C array. int nb=0; while(nb<b1 && 2*a1<=b1) { j=lambda; min=mask; while (--j>=0 && min>threshold) { + //std::cout<<*(p_start+nb)<<std::endl; + //std::cout<<j<<std::endl; h=hash64to32(*(p_start+nb),j); + //std::cout<<h<<std::endl; + //unsigned int test=hash64to32(657922856560023,0); + min=cms_lambda_array[j] [h]; } (min<threshold)? a1+=1:a1; nb++; } PE1_below_thres=2*a1>b1; + if (b2) { + nb=b1; + while(nb<num && 2*a2<=b2) { + j=lambda; + min=mask; + while (--j>=0 && min>threshold) { + h=hash64to32(*(p_start+nb),j); + min=cms_lambda_array[j] [h]; + } + (min<threshold)? a2+=1:a2; + nb++; + } + PE2_below_thres=2*a2>b2; + return (PE1_below_thres || PE2_below_thres); + } return PE1_below_thres; } @@ -252,14 +278,14 @@ template<typename T> T CountMinSketch<T>::getEstimatedNbOcc(const unsigned long& return min; } -template<typename T> int CountMinSketch<T>::addRead(const readNumericValues& read_val) { +template<typename T> int CountMinSketch<T>::addRead(const T_read_numericValues& read_val) { int keep_r=isRCovBelowThres(read_val,kappa); if (keep_r) { readNumericValues::const_iterator it; // according to the intel profiler, we spend a lot of time in this loop. Try to use something slightly faster. - long const * p=(long const *) &read_val[0]; + long const * p=(long const *) &read_val.single_or_PE_val[0]; int cnt; - int stop=read_val.size(); + int stop=read_val.single_or_PE_val.size(); for (cnt=0;cnt<stop;cnt++) { this->addKMer(*p); p=p+1; @@ -268,7 +294,7 @@ template<typename T> int CountMinSketch<T>::addRead(const readNumericValues& rea return keep_r; } -template<typename T> int CountMinSketch<T>::isBeneathMinKappa(const readNumericValues& read_val) { +template<typename T> int CountMinSketch<T>::isBeneathMinKappa(const T_read_numericValues& read_val) { int res=isRCovBelowThres(read_val,kappa_prime); return res; } diff --git a/src/Filter.hpp b/src/Filter.hpp index 8537fa225ef2b7ce8afbf50e030516b1578691f3..943a28b098841899f53bbadfc250d266bf442892 100644 --- a/src/Filter.hpp +++ b/src/Filter.hpp @@ -49,8 +49,7 @@ class Filter { ShortCountMinSketch * pShortCMS; FasqQualThreshold qual_thres; - // long nb_bytes_before_fill_CMS,nb_bytes_after_fill_CMS; - //long nb_bytes_CMS; //! Number of bytes taken by the underlying CMS. + int process_PE_separately; void getRSS(); @@ -66,9 +65,7 @@ public: if (parms.kappa<ubytemask) pByteCMS=new ByteCountMinSketch(parms); else pShortCMS=new ShortCountMinSketch(parms); qual_thres=the_qual_thres; - //getRSS(); - //nb_bytes_CMS=0; - //nb_bytes_after_fill_CMS=0; + process_PE_separately=parms.process_PE_separately; } void fillCMS(FqBaseBackend* map_id_backend[],int nb_be,int k,srp* io_sr); @@ -85,7 +82,7 @@ template <typename T> void Filter::underlyingfillCMS(FqBaseBackend* map_id_backe i_dim::iterator it_offs; k_dim::iterator it_struct; ReadProcessor read_p(k); - readNumericValues nbrKmerDecompo; + T_read_numericValues nbrKmerDecompo; // 1rst step, open files before fetching reads in them int i; @@ -103,14 +100,14 @@ template <typename T> void Filter::underlyingfillCMS(FqBaseBackend* map_id_backe init_DnaSeqStr(&a_seqs[1]); getDNASeqstr(map_id_backend,*it_struct,j,a_seqs,qual_thres.nucl_score_threshold); // decompose dna string into k-mers and turn k_mers into numbers. - decomposeReadInKMerNums(read_p, nbrKmerDecompo,k,a_seqs); + decomposeReadInKMerNums(read_p, nbrKmerDecompo,k,a_seqs,process_PE_separately); // insert k-mers into CMS if overall read satisfies kappa condition. ret=cms->addRead(nbrKmerDecompo); if (!ret) { // read is rejected, update rpos structure accordingly. it_struct->fileid=0; } - nbrKmerDecompo.clear(); + nbrKmerDecompo.single_or_PE_val.clear(); } } } @@ -126,7 +123,7 @@ template <typename T> void Filter::underlyinglowFilterCMS(FqBaseBackend* map_id_ k_dim::iterator it_struct; int ret; ReadProcessor read_p(k); - readNumericValues nbrKmerDecompo; + T_read_numericValues nbrKmerDecompo; // 1rst step, open files before fetching reads in them int i; @@ -145,10 +142,10 @@ template <typename T> void Filter::underlyinglowFilterCMS(FqBaseBackend* map_id_ init_DnaSeqStr(&a_seqs[1]); getDNASeqstr(map_id_backend,*it_struct,j,a_seqs); // decompose dna string into k-mers and turn k_mers into numbers. - decomposeReadInKMerNums(read_p, nbrKmerDecompo,k,a_seqs); + decomposeReadInKMerNums(read_p, nbrKmerDecompo,k,a_seqs,process_PE_separately); ret=cms->isBeneathMinKappa(nbrKmerDecompo); if (ret) it_struct->fileid=0; - nbrKmerDecompo.clear(); + nbrKmerDecompo.single_or_PE_val.clear(); } } } diff --git a/src/FqMainBackend.cpp b/src/FqMainBackend.cpp index a259053d242b70435a27d96694ec8a555a51826d..c8cda45a566849523eb69ea9af5ed50204eeae94 100644 --- a/src/FqMainBackend.cpp +++ b/src/FqMainBackend.cpp @@ -71,9 +71,6 @@ void FqMainBackend::processFile(char * filename,unsigned char f_id) { size_t buffer_size; (test_mode)?buffer_size=test_bufsize:buffer_size=FqBaseBackend::bufsize; - //this->openUndefFile(); - //if (p_auxFqProcessor!=NULL) p_auxFqProcessor->openUndefFile(); - int f_single=open(filename,O_RDONLY,mode); if (f_single==-1) { err(errno,"cannot open file: %s.",filename); @@ -90,9 +87,6 @@ void FqMainBackend::processFile(char * filename,unsigned char f_id) { processBuf(buf_info,f_id,cur_offset); } close(f_single); - /*this->closeUndefFile(); - if (p_auxFqProcessor!=NULL) p_auxFqProcessor->closeUndefFile();*/ - free(buf); } @@ -117,14 +111,17 @@ void FqMainBackend::onEndFastqRecord(T_fq_rec_info& rec_info,const T_buf_info& b //debug_processBuf(on_record_end,bufinfo,rec_info.rstart_offset); int nb_k_mer=rec_info.nb_nucleotides_in_read+1-qual_thres.k; rec_info.nb_nucleotides_in_read_PE2>0?nb_k_mer_PE2=rec_info.nb_nucleotides_in_read_PE2+1-qual_thres.k:nb_k_mer_PE2=0; - if (!treat_PE_separately || (p_auxFqProcessor==NULL)) { + if (p_auxFqProcessor==NULL) { //case of single reads nb_k_mer+=nb_k_mer_PE2; //(nb_k_mer-rec_info.nb_k_mers_in_error-rec_info.nb_k_mers_in_error_in_PE2>=qual_thres.min_correct_k_mers_in_read)?ref_k_dim.push_back(rp):writeToUndefFile(bufinfo); if (nb_k_mer-rec_info.nb_k_mers_in_error-rec_info.nb_k_mers_in_error_in_PE2>=qual_thres.min_correct_k_mers_in_read) ref_k_dim.push_back(rp); - } else { + } else { // PE reads are processed separately by default // ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2>=1) && (nb_k_mer-rec_info.nb_k_mers_in_error>=1))?ref_k_dim.push_back(rp):writeToUndefFile(bufinfo); // ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2>=qual_thres.min_correct_k_mers_in_read) && (nb_k_mer-rec_info.nb_k_mers_in_error>=qual_thres.min_correct_k_mers_in_read))?ref_k_dim.push_back(rp):writeToUndefFile(bufinfo); - if ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2>=qual_thres.min_correct_k_mers_in_read) && (nb_k_mer-rec_info.nb_k_mers_in_error>=qual_thres.min_correct_k_mers_in_read)) ref_k_dim.push_back(rp); + //if ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2>=qual_thres.min_correct_k_mers_in_read) && (nb_k_mer-rec_info.nb_k_mers_in_error>=qual_thres.min_correct_k_mers_in_read)) ref_k_dim.push_back(rp); + //cout<<"nb_k_mer_PE2="<<nb_k_mer_PE2<<" nb_k_mer="<<nb_k_mer<<endl; + //cout<<" nb_k_mers_in_error_in_PE2="<<rec_info.nb_k_mers_in_error_in_PE2<<" nb_k_mers_in_error="<<rec_info.nb_k_mers_in_error<<endl; + if ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2+nb_k_mer-rec_info.nb_k_mers_in_error>=qual_thres.min_correct_k_mers_in_read)) ref_k_dim.push_back(rp); } // empty buffer keeping current record cur_fq_record[0]='\0'; diff --git a/src/FqMainBackend.h b/src/FqMainBackend.h index aad846ba68dc96856629d139c1833559c2e9515f..050f1c90a4cdb126345bf0358414ed18a3208b95 100644 --- a/src/FqMainBackend.h +++ b/src/FqMainBackend.h @@ -32,7 +32,7 @@ class FqMainBackend : public FqBaseBackend { // static int treat_PE_as_single; - static int treat_PE_separately; + //static int treat_PE_separately; unsigned long cnt_k_mers; //! counter for the total number of k-mers found in a single file or in a pair of PE files. //! This includes k-mers in error. FqAuxBackend * p_auxFqProcessor; /* Another fastq processor component is necessary for handling the case of PE reads.*/ @@ -56,9 +56,9 @@ public: void processBuf(T_buf_info&,unsigned char f_id,unsigned long cur_offset); - static void setTreatPEMode(const int& treat_PE){ + /*static void setTreatPEMode(const int& treat_PE){ FqMainBackend::treat_PE_separately=treat_PE; - } + }*/ unsigned long getCntKMers() { return cnt_k_mers; diff --git a/src/ROCKparams.cpp b/src/ROCKparams.cpp index 680bb6e1c74c400847e801569284b803906452fe..79f0ed99d97566dacae01e798ba62422d3b7e43b 100755 --- a/src/ROCKparams.cpp +++ b/src/ROCKparams.cpp @@ -41,7 +41,7 @@ const int ROCKparams::undef_ext; void usage(int status) { cout<<endl; - cout<<"ROCK 1.9.6 Copyright (C) 2016-2021 Institut Pasteur"<<endl; + cout<<"ROCK 2.0 Copyright (C) 2016-2022 Institut Pasteur"<<endl; cout<<endl; cout<<"Reducing Over-Covering K-mers within FASTQ file(s)"<<endl; cout<<endl; @@ -65,6 +65,8 @@ void usage(int status) { cout<<" -f <float> maximum expected false positive probability when computing"<<endl; cout<<" the optimal number of hashing functions from the number of"<<endl; cout<<" distinct k-mers specified with option -n (default: 0.05)."<<endl; + cout<<" -p process PE reads separately. This allows the selection of "<<endl; + cout<<" more reads which in some cases gives better assembly results."<<endl; cout<<" -q <int> sets as valid only k-mers made up of nucleotides with"<<endl; cout<<" Phred score (+33 offset) above this cutoff (default: 0)"<<endl; cout<<" -m <int> minimum number of valid k-mer(s) to consider a read;"<<endl; @@ -324,7 +326,7 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { // static int PE_separately=1; float proba=k_max_collision_proba; - while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vhq:m:f:")) != -1) { + while((i = getopt(argc, argv, "i:o:l:k:c:C:n:vhpq:m:f:")) != -1) { switch(i) { case 0: break; @@ -369,6 +371,9 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { char * t; //cout<<optarg<<endl; nb_k_mers=strtoul(optarg,&t,10);break; // number of distinct k-mers + case 'p': + parms.process_PE_separately=1; + break; case 'v': verbose_mode=1; break; diff --git a/src/ROCKparams.h b/src/ROCKparams.h index bea282b6eb4f6fe9b215ba7898f0b6daf9a30f9f..31f727cf6e6a6350c5d07fa0c8ebc3a4fd998fe2 100755 --- a/src/ROCKparams.h +++ b/src/ROCKparams.h @@ -70,6 +70,7 @@ class ROCKparams{ unsigned long nb_k_mers; // expected number of k-mers in input data if specified by the user. int k; // size of the k-mers int verbose_mode; + // int process_PE_separately; std::string input_file,output_file; std::vector<IO_fq_files> single_files; @@ -119,6 +120,7 @@ public: qual_thres.nucl_score_threshold=k_phred_32; qual_thres.k=k; verbose_mode=0; + parms.process_PE_separately=0; cms_size=0; expected_collision_proba=0.0; //! collision probability that is computed at the beginning of ROCK from the expected number of distinct k-mers provided by the user. parms.max_filter_size=getNodePhysMemory()/100.0*90-defaultGRPMAXSize; // Prefer not to use all the machine's memory diff --git a/src/ReadProcessor.cpp b/src/ReadProcessor.cpp index dde661758b9606f3f50919c0342cb5279cc885af..66bec291bbbf5897f6cdebff8c01c5f4df57eae3 100644 --- a/src/ReadProcessor.cpp +++ b/src/ReadProcessor.cpp @@ -59,17 +59,17 @@ void ReadProcessor::kMerAndRevToNumber(char * k_m,KmRevNumbers& numbers) { -void ReadProcessor::getKMerNumbers(char * dnaStr,int l,std::vector<unsigned long>& my_vect) { // See simple_test.cpp and results. benchmark showed that std::vector is very slightly faster than C array and doesn't require more memory in our case. So, I am using it since it makes code simpler. +void ReadProcessor::getKMerNumbers(char * dnaStr,int l,readNumericValues& all_values) { // See simple_test.cpp and results. benchmark showed that std::vector is very slightly faster than C array and doesn't require more memory in our case. So, I am using it since it makes code simpler. int i; KmRevNumbers numbers; initNumbers(numbers); char * p_char=dnaStr; int nb_k_m=l-k+1; for (i=0; i<nb_k_m;i++) { - kMerAndRevToNumber(p_char,numbers); - if (n<0) { - (numbers.nbr<numbers.nbr_rev)?my_vect.push_back(numbers.nbr_rev):my_vect.push_back(numbers.nbr); // finding a k-mer is equivalent to finding its reverse complement; as numeric values for k-mers are unique, just take into account the biggest of the 2. - } + kMerAndRevToNumber(p_char,numbers); + if (n<0) { + (numbers.nbr<numbers.nbr_rev)?all_values.push_back(numbers.nbr_rev):all_values.push_back(numbers.nbr); // finding a k-mer is equivalent to finding its reverse complement; as numeric values for k-mers are unique, just take into account the biggest of the 2. + } p_char++; } } diff --git a/src/ReadProcessor.h b/src/ReadProcessor.h index 8caa552c8da2795bf4f864211d853010ffa36c4d..f3396cd601193db7690331dfe4178ac8ec5f8fbb 100644 --- a/src/ReadProcessor.h +++ b/src/ReadProcessor.h @@ -33,6 +33,7 @@ #include <iostream> +#include "rock_commons.h" using namespace std; @@ -132,7 +133,7 @@ public: void kMerAndRevToNumber(char * k_m,KmRevNumbers& numbers); unsigned long nucleoToNumberWrap(char s); - void getKMerNumbers(char * dnaStr,int l,std::vector<unsigned long>& my_vect); + void getKMerNumbers(char * dnaStr,int l,readNumericValues&); }; diff --git a/src/fqreader.cpp b/src/fqreader.cpp index 80f7fdead21a23113ce65ffe9bfe60c98b4fab89..0a0a3a4a1e39762954d0b7af4975c003bcd96ecc 100644 --- a/src/fqreader.cpp +++ b/src/fqreader.cpp @@ -38,7 +38,7 @@ FasqQualThreshold FqBaseBackend::qual_thres; -int FqMainBackend::treat_PE_separately; +//int FqMainBackend::treat_PE_separately; /* * Processes 1 file containing single reads. @@ -80,7 +80,7 @@ unsigned long processInputFiles(const std::vector<IO_fq_files>& single_files,\ unsigned long tot_nb_kmers=0; unsigned char f_id=1; FqBaseBackend::setQualThreshold(a_qual_thres); - FqMainBackend::setTreatPEMode(PE_process_mode); + //FqMainBackend::setTreatPEMode(PE_process_mode); std::vector<IO_fq_files>::const_iterator it_single; for (it_single=single_files.begin();it_single!=single_files.end();it_single++) { FqMainBackend * be_fq=new FqMainBackend(io_sr); diff --git a/src/read_utils.cpp b/src/read_utils.cpp index d223fca11d76d04df4c131365d4c942b71ad112f..5770ce6c8d724842fc43d09c1fe61332eb2ab47b 100644 --- a/src/read_utils.cpp +++ b/src/read_utils.cpp @@ -149,19 +149,22 @@ void getDNASeqstr(FqBaseBackend* fq_files_be [], } -void decomposeReadInKMerNums(ReadProcessor& read_p, readNumericValues& nbrKmerDecompo,int k,DnaSeqStr a_seqs[2]) { +void decomposeReadInKMerNums(ReadProcessor& read_p, T_read_numericValues& nbrKmerDecompo,int k,DnaSeqStr a_seqs[2], int process_PE_separately) { int nb_expected_k_mers,nb_expected_k_mers_PE1,nb_expected_k_mers_PE2=0; nb_expected_k_mers_PE1=a_seqs[0].length+1-k; + nbrKmerDecompo.idx_start_PE2=0; if (a_seqs[1].length) nb_expected_k_mers_PE2=a_seqs[1].length+1-k; nb_expected_k_mers=nb_expected_k_mers_PE1+nb_expected_k_mers_PE2; - nbrKmerDecompo.reserve(nb_expected_k_mers); + nbrKmerDecompo.single_or_PE_val.reserve(nb_expected_k_mers); + //nbrKmerDecompo.nb_kmer_PE1=nb_expected_k_mers_PE1; char * start_dna_str=a_seqs[0].fq_record_buf+a_seqs[0].start_idx; - read_p.getKMerNumbers(start_dna_str,a_seqs[0].length,nbrKmerDecompo); + read_p.getKMerNumbers(start_dna_str,a_seqs[0].length,nbrKmerDecompo.single_or_PE_val); if (a_seqs[1].length) { // case of PE reads + if (process_PE_separately) nbrKmerDecompo.idx_start_PE2=nb_expected_k_mers_PE1; start_dna_str=a_seqs[1].fq_record_buf+a_seqs[1].start_idx; - read_p.getKMerNumbers(start_dna_str,a_seqs[1].length,nbrKmerDecompo); + read_p.getKMerNumbers(start_dna_str,a_seqs[1].length,nbrKmerDecompo.single_or_PE_val); } } diff --git a/src/read_utils.h b/src/read_utils.h index 5bb504cbef4b7eefbb0006acae918977eaff4fb9..c9b6aecf0cbd607973d7b01d2d432bcc6da7f99e 100644 --- a/src/read_utils.h +++ b/src/read_utils.h @@ -36,6 +36,6 @@ void getDNASeqstr(FqBaseBackend* [], void init_DnaSeqStr(DnaSeqStr * dna_seq); -void decomposeReadInKMerNums(ReadProcessor& read_p, readNumericValues& nbrKmerDecompo,int k,DnaSeqStr a_seqs[2]); +void decomposeReadInKMerNums(ReadProcessor& read_p, T_read_numericValues& nbrKmerDecompo,int k,DnaSeqStr a_seqs[2],int process_PE_separately=0); #endif /* READ_UTILS_H_ */ diff --git a/src/rock_commons.h b/src/rock_commons.h index f4647d380d8084b9a5335651a010cb4ee646d2c9..cfaf09c82de9232b289e16ddadde12358f7f66d6 100644 --- a/src/rock_commons.h +++ b/src/rock_commons.h @@ -70,11 +70,13 @@ typedef struct { }PE_files; -typedef std::vector<unsigned long> readNumericValues; +typedef struct std::vector<unsigned long> readNumericValues; -/*typedef struct { +typedef struct { readNumericValues single_or_PE_val; -}T_read_numericValues;*/ + int idx_start_PE2; // In he case where PE reads are not treated as single; + // indicate at which index the single_or_PE_val array starts containing values for PE2 k-mers. +}T_read_numericValues; typedef struct { unsigned int nb_input_reads; diff --git a/src/unit_test_cms.cpp b/src/unit_test_cms.cpp index 21bee4ac7e459ce2e3c383679565f29c91d62c2c..8a78c553a838a5bc91a4c3471c5d4e1ed95a3ee3 100644 --- a/src/unit_test_cms.cpp +++ b/src/unit_test_cms.cpp @@ -141,9 +141,10 @@ void test_CMS(int lambda,int kappa,int kappa_prime) { assert(nb_distinct_k_mers==1); // mimic a read (as given by part 2 output). - readNumericValues read_values; + T_read_numericValues read_values; + read_values.idx_start_PE2=0; for (i=1;i<=400;i++) { - read_values.push_back(i*1000-1); + read_values.single_or_PE_val.push_back(i*1000-1); } ret=cms.addRead(read_values); // read should be accepted since none of the values that it contains is equal to those I inserted previously. assert(ret!=rej_expected); @@ -151,15 +152,32 @@ void test_CMS(int lambda,int kappa,int kappa_prime) { nb_distinct_k_mers=cms.getNbDistinctKMers(); assert(nb_distinct_k_mers==401); - // mimic a vector that contains a lot of k_mers already present in the CMS and check that it is rejected. - readNumericValues read_values2; + // mimic a vector that contains a lot of k_mers already present in the CMS in its PE2 but not in its PE1 and check that it is kept. + T_read_numericValues read_values2; + read_values2.idx_start_PE2=200; for (i=1;i<=199;i++) { - read_values2.push_back(i*1000-1); + read_values2.single_or_PE_val.push_back(i*1000-1); } for (i=200;i<=400;i++) { - read_values2.push_back(num); + read_values2.single_or_PE_val.push_back(num); } ret=cms.addRead(read_values2); + assert(ret!=rej_expected); + + nb_distinct_k_mers=cms.getNbDistinctKMers(); + assert(nb_distinct_k_mers==401); + + //mimic a vector that contains in both its PE1 and PE2 parts a lot of kmers already in the CMS and check that it is rejected. + T_read_numericValues read_values3; + read_values3.idx_start_PE2=200; + for (i=1;i<=160;i++) { + read_values3.single_or_PE_val.push_back(num); + } + for (i=61;i<=99;i++) read_values3.single_or_PE_val.push_back(num-1); + for (i=200;i<=400;i++) { + read_values3.single_or_PE_val.push_back(num); + } + ret=cms.addRead(read_values3); assert(ret==rej_expected); nb_distinct_k_mers=cms.getNbDistinctKMers(); diff --git a/src/unit_test_fqreader.cpp b/src/unit_test_fqreader.cpp index 46c3df9a2e2125af790d7461c9437822cc336a96..9756c3262532f0e5e527b9777ad45c88fcb893aa 100644 --- a/src/unit_test_fqreader.cpp +++ b/src/unit_test_fqreader.cpp @@ -47,7 +47,7 @@ const int treat_PE_as_single=0; void test_processSingleFile() { srp sr; unsigned char f_id=1; - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); processSingleFile((char *) "../test/data/unit/test_single.fq",f_id,&sr); srp::reverse_iterator rit; i_dim::iterator it_offs; @@ -92,7 +92,7 @@ void test_processSingleFileWithMQOption() { qual_thres.nucl_score_threshold=14+k_phred_32; qual_thres.min_correct_k_mers_in_read=100; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); FqMainBackend be_fq=FqMainBackend(&sr); //be_fq.setUndefFile((char *) "../test/data/unit/test_single.undef.fq"); @@ -170,7 +170,7 @@ void test_processPEfilesWithA() { unsigned char f_id4=4; srp sr; - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); processPEFiles(fq_3_test, f_id3,fq_4_test, f_id4,&sr); srp::reverse_iterator rit; i_dim::iterator it_offs; @@ -227,7 +227,7 @@ void test_processPEFiles() { unsigned char f_id2=2; srp sr; - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr); srp::reverse_iterator rit; i_dim::iterator it_offs; @@ -286,7 +286,7 @@ void aux_testPEFilesMQ(FasqQualThreshold qual_thres,int nb_expected_reads) { unsigned char f_id1=1; unsigned char f_id2=2; - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); FqBaseBackend::setQualThreshold(qual_thres); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr); @@ -315,7 +315,7 @@ void test_processPEFilesWithMQOptions() { qual_thres.k=20; qual_thres.nucl_score_threshold=14+k_phred_32; qual_thres.min_correct_k_mers_in_read=76; - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); aux_testPEFilesMQ(qual_thres,4); // last fq records contains only 77 correct k-mers. @@ -368,23 +368,17 @@ void test_processAllFiles() { unsigned char f_single=3; srp sr; - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr); processSingleFile(fq_single,f_single,&sr); check_processAllFilesResults(sr); } -void test_processPE_not_as_single() { +void test_processPE_nuclthreshold() { char * fq_1_test=(char *) "../test/data/unit/test_PE1_PE_not_as_single.fq"; char * fq_2_test=(char *) "../test/data/unit/test_PE2_PE_not_as_single.fq"; - char * fq_1_test_undef=(char *) "../test/data/unit/test_PE1_PE_not_as_single.undef.fastq"; - char * fq_2_test_undef=(char *) "../test/data/unit/test_PE2_PE_not_as_single.undef.fastq"; - - char * fq_1_expected_undef=(char *) "../test/data/unit/expected_PE1_PE_not_as_single.undef.fastq"; - char * fq_2_expected_undef=(char *) "../test/data/unit/expected_PE2_PE_not_as_single.undef.fastq"; - unsigned char f_id1=1; unsigned char f_id2=2; srp sr; @@ -395,7 +389,6 @@ void test_processPE_not_as_single() { qual_thres.nucl_score_threshold=2+k_phred_32; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(1); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,1000); srp::reverse_iterator rit; @@ -411,18 +404,13 @@ void test_processPE_not_as_single() { } } } - assert(cnt_read==1); - - /*assert(compareFilesLileByLine(fq_1_test_undef,fq_1_expected_undef)==0); - assert(compareFilesLileByLine(fq_2_test_undef,fq_2_expected_undef)==0); + assert(cnt_read==3); - assert(remove((char *) "../test/data/unit/test_PE1_PE_not_as_single.undef.fastq")==0); - assert(remove((char *) "../test/data/unit/test_PE2_PE_not_as_single.undef.fastq")==0);*/ } -void test_processPE_not_as_single2() { +void test_processPE_nuclthreshold2() { char * fq_1_test=(char *) "../test/data/unit/test_PE1_PE_not_as_single_pathological.fq"; char * fq_2_test=(char *) "../test/data/unit/test_PE2_PE_not_as_single_pathological.fq"; @@ -435,11 +423,10 @@ void test_processPE_not_as_single2() { FasqQualThreshold qual_thres; qual_thres.k=30; - qual_thres.min_correct_k_mers_in_read=1; + qual_thres.min_correct_k_mers_in_read=221; qual_thres.nucl_score_threshold=10+k_phred_32; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(1); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,1000); srp::reverse_iterator rit; @@ -448,20 +435,14 @@ void test_processPE_not_as_single2() { int cnt_read=0; for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). - //unsigned long score=rit->first; for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { cnt_read++; } } } - assert(cnt_read==0); - - /*assert(compareFilesLileByLine(fq_1_test_undef,fq_1_test)==0); - assert(compareFilesLileByLine(fq_2_test_undef,fq_2_test)==0); + assert(cnt_read==2); - assert(remove((char *) "../test/data/unit/test_PE1_PE_not_as_single_pathological.undef.fastq")==0); - assert(remove((char *) "../test/data/unit/test_PE2_PE_not_as_single_pathological.undef.fastq")==0);*/ } @@ -469,9 +450,6 @@ void test_processPE_not_as_singleWithMQ() { char * fq_1_test=(char *) "../test/data/unit/fake_PE1.fq"; char * fq_2_test=(char *) "../test/data/unit/fake_PE2.fq"; - char * fq_1_test_undef=(char *) "../test/data/unit/fake_PE1.undef.fastq"; - char * fq_2_test_undef=(char *) "../test/data/unit/fake_PE2.undef.fastq"; - unsigned char f_id1=1; unsigned char f_id2=2; srp sr; @@ -482,7 +460,6 @@ void test_processPE_not_as_singleWithMQ() { qual_thres.nucl_score_threshold=10+k_phred_32; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(1); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,1000); srp::reverse_iterator rit; @@ -491,7 +468,6 @@ void test_processPE_not_as_singleWithMQ() { int cnt_read=0; for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). - //unsigned long score=rit->first; for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { cnt_read++; @@ -499,27 +475,19 @@ void test_processPE_not_as_singleWithMQ() { } } assert(cnt_read==1); - /*struct stat fileStat; - stat(fq_1_test_undef, &fileStat); - assert(fileStat.st_size==0); - stat(fq_2_test_undef, &fileStat); - assert(fileStat.st_size==0);*/ sr.clear(); + cout<<"try with a different min_correct_k_mers_in_read=87"<<endl; qual_thres.k=30; - qual_thres.min_correct_k_mers_in_read=45; + qual_thres.min_correct_k_mers_in_read=87; qual_thres.nucl_score_threshold=15+k_phred_32; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(1); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,1000); - - cnt_read=0; for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). - //unsigned long score=rit->first; for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { cnt_read++; @@ -527,25 +495,21 @@ void test_processPE_not_as_singleWithMQ() { } } assert(cnt_read==0); - sr.clear(); - /*assert(compareFilesLileByLine(fq_1_test_undef,fq_1_test)==0); - assert(compareFilesLileByLine(fq_2_test_undef,fq_2_test)==0);*/ + cout<<"try with a different min_correct_k_mers_in_read=13"<<endl; qual_thres.k=30; qual_thres.min_correct_k_mers_in_read=13; qual_thres.nucl_score_threshold=18+k_phred_32; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(1); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,1000); cnt_read=0; for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). - //unsigned long score=rit->first; for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { cnt_read++; @@ -555,16 +519,6 @@ void test_processPE_not_as_singleWithMQ() { assert(cnt_read==1); sr.clear(); - - /*stat(fq_1_test_undef, &fileStat); - assert(fileStat.st_size==0); - stat(fq_2_test_undef, &fileStat); - assert(fileStat.st_size==0); - - - assert(remove((char *) fq_1_test_undef)==0); - assert(remove((char *) fq_2_test_undef)==0);*/ - } @@ -591,7 +545,7 @@ void test_processInputFiles() { default_qual_thres.min_correct_k_mers_in_read=0; // aim of that test is not to check undef file creation. Disable it by putting 0 here default_qual_thres.nucl_score_threshold=0; // leave default values for that test - FqMainBackend::setTreatPEMode(treat_PE_as_single); + //FqMainBackend::setTreatPEMode(treat_PE_as_single); FqBaseBackend * array_be[k_max_input_files]; processInputFiles(v_single,v_pe,array_be,default_qual_thres,&sr,1); @@ -638,7 +592,7 @@ AAAAAEEAEEEEEEEEE6EE/EEEEEEEEAEEAEEEEEEEEEEEEEEAEEEA/A/EEEAEEEEEE/EE</EAEEEEEE/E qual_thres.min_correct_k_mers_in_read=78; T_buf_info buf_info; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(treat_PE_as_single); + //FqMainBackend::setTreatPEMode(treat_PE_as_single); FqMainBackend be(&sr); buf_info.buf=buf; buf_info.pchar=buf; @@ -680,7 +634,7 @@ AAAAAEEEEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEE<EEEAEEEEA<EAEEEEEEEAAAAE<EEEE/EEEEAAEEE qual_thres.nucl_score_threshold=k_phred_32; qual_thres.k=25; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(treat_PE_as_single); + //FqMainBackend::setTreatPEMode(treat_PE_as_single); FqMainBackend be(&sr); buf_info.buf=buf; buf_info.pchar=buf; @@ -751,7 +705,7 @@ AAAAAEEEEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEE<EEEAEEEEA<EAEEEEEEEAAAAE<EEEE/EEEEAAEEE qual_thres.nucl_score_threshold=k_phred_32; qual_thres.k=25; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(treat_PE_as_single); + //FqMainBackend::setTreatPEMode(treat_PE_as_single); FqMainBackend be(&sr); FqAuxBackend be2; @@ -820,7 +774,7 @@ AAAAAEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEAEEEEEEEEEEEEEEAEEEEEEEEEEEE T_buf_info buf_info; T_buf_info PE2_buf_info; FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(0); + //FqMainBackend::setTreatPEMode(0); FqMainBackend be(&sr); @@ -861,18 +815,11 @@ void Aux_MimicBigPEFilesWithMQOptions(const FasqQualThreshold& qual_thres,const char * fq_1_test_undef=(char *) "../test/data/unit/09-4607_S43_R1.undef.fastq"; char * fq_2_test_undef=(char *) "../test/data/unit/09-4607_S43_R2.undef.fastq"; - char * fq_1_expected_undef_100=(char *) "../test/data/unit/thres_100_09-4607_S43_R1.undef.fastq"; - char * fq_2_expected_undef_100=(char *) "../test/data/unit/thres_100_09-4607_S43_R2.undef.fastq"; - - char * fq_1_expected_undef_180=(char *) "../test/data/unit/thres_180_09-4607_S43_R1.undef.fastq"; - char * fq_2_expected_undef_180=(char *) "../test/data/unit/thres_180_09-4607_S43_R2.undef.fastq"; - unsigned char f_id1=1; unsigned char f_id2=2; - // FqBaseBackend::bufsize=bufsize; // choose a very small buf size to mimic behavior on big files and reun the test in a short time + FqBaseBackend::setQualThreshold(qual_thres); - FqMainBackend::setTreatPEMode(0); processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,bufsize); @@ -937,11 +884,11 @@ int main(int argc, char **argv) { test_processPEFilesWithMQOptions(); cout<<"test processing files with thresholds (nucleotide quality score and minimum number of valid k_mers in read) mimic big files that need several read operations. "<<endl; test_MimicBigPEFilesWithMQOptions(); - cout<<"test processing PE files with nucleotide quality threshold and PE not treated as single option."<<endl; - test_processPE_not_as_single(); - cout<<"test processing PE files with lots of errors in PE2; nucleotide quality threshold and PE not treated as single option."<<endl; - test_processPE_not_as_single2(); - cout<<"test processing PE files with PE not treated as single; quality score threshold and minimum number of valid k-mer."<<endl; + cout<<"test processing PE files with nucleotide quality threshold."<<endl; + test_processPE_nuclthreshold(); + cout<<"test processing PE files with lots of errors in PE2; nucleotide quality threshold"<<endl; + test_processPE_nuclthreshold2(); + cout<<"test processing PE files; quality score threshold and minimum number of valid k-mer."<<endl; test_processPE_not_as_singleWithMQ(); cout<<"done"<<endl; } diff --git a/src/unit_test_read_utils.cpp b/src/unit_test_read_utils.cpp index 44560b997ed5b4905bf5c3bb64927f946d2a90e1..d510415f1a87bf3489aa2472db0240452dec1fef 100644 --- a/src/unit_test_read_utils.cpp +++ b/src/unit_test_read_utils.cpp @@ -42,8 +42,6 @@ void test_getReadSingle() { my_struct1=init_rpos(4,639); my_struct2=init_rpos(4,1228); - //std::cout<<my_struct1.read_a1<<endl; - assert(my_struct1.read_a1==639); assert(my_struct2.read_a1==1228); srp io_sr; @@ -147,7 +145,6 @@ void test_getReadPE() { be_fq1.closeInputFile(); be_fq2.closeFile(); - //cout<<"done"<<endl; } void test_getReadPEWithNQST() { @@ -191,19 +188,48 @@ void test_getReadPEWithNQST() { std::cout<<dna_read<<endl; assert(strcmp(dna_read,"GGNTCTGTTGGCTCAAACGCCNTCACNTCNTTGNTCAAAAGCTTATAATGCGNGCCAAANTCCGCCATCGNGACNNCTACGCCTTCCCCNGCTNTCCCGNCAAANNCNNGTCTTGCCGGANCTTCNCNNNCTCCNNTCGAAAGCGGCGAAATCTTAGNGGAAGGNGGANATAATGCNNTCNCNTCGNACNNTGAANNTNTANNCGGCANNCNGCAGNNTCNAGGNCTTNCNGNGNAACGNTTAANNGCAGATGGCTACGGNTTTGCNGGGGNANGAGACNGGANANCNNNGNCGNTCGNCCN")==0); - /*getDnaStr(fic_map,my_struct2,a_seqs,dna_read); - assert(strcmp(dna_read,"ATTGTGGGGTTCCTTTTTGTAGCATTGGAATGGAAATTAAAAATGGGGCTTCAGGATGCCCGCTCCATTATTTAATTCCAGAATGTAACGATGCTGTTTACCGGGGGGACTGGAAAGATGCACTTGAGCTTTTGATTAAAACAAATAACATGCCCAGAACCAATCACTGCAATTTTTTTATCCCACCGCACTATCGGTGGAGTCGGCATGAACCAACCTAAACCAAACCCACGGTCAATAATAGCCCGTTCAATCGAATTAATACCCACAGCAGGATCAGAAATTGCAACCGTACAACTTC")==0); -*/ + be_fq1.closeInputFile(); be_fq2.closeFile(); - //cout<<"done"<<endl; } void test_decomposeReadInkMerNums() { int k=5; ReadProcessor read_p(k); - readNumericValues nbrKmerDecompo; + int processPE_separately=1; + T_read_numericValues nbrKmerDecompo; + + DnaSeqStr a_seqs[2]; + init_DnaSeqStr(&a_seqs[0]); + init_DnaSeqStr(&a_seqs[1]); + DnaSeqStr * seq1=a_seqs; + + strcpy(seq1->fq_record_buf,"@fake_stuff\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+\n....@\n"); + seq1->start_idx=12; + seq1->length=75; + + DnaSeqStr * seq2=&a_seqs[1]; + strcpy(seq2->fq_record_buf,"@another_fake_stuff\nATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG\n+\n....@\n"); + seq2->start_idx=20; + seq2->length=80; + decomposeReadInKMerNums(read_p,nbrKmerDecompo,k,a_seqs,processPE_separately); + assert(nbrKmerDecompo.single_or_PE_val.size()==71+76); + assert(nbrKmerDecompo.idx_start_PE2==71); + readNumericValues::iterator it; + readNumericValues::iterator it_end_PE1=nbrKmerDecompo.single_or_PE_val.begin()+71; + for (it=nbrKmerDecompo.single_or_PE_val.begin();it!=it_end_PE1;it++) { + assert(*it==1023); + } + + nbrKmerDecompo.single_or_PE_val.clear(); +} + +void test_decomposeReadInkMerNums_PEAsSingle() { + int k=5; + ReadProcessor read_p(k); + T_read_numericValues nbrKmerDecompo; + DnaSeqStr a_seqs[2]; init_DnaSeqStr(&a_seqs[0]); init_DnaSeqStr(&a_seqs[1]); @@ -213,25 +239,21 @@ void test_decomposeReadInkMerNums() { seq1->start_idx=12; seq1->length=75; decomposeReadInKMerNums(read_p,nbrKmerDecompo,k,a_seqs); - assert(nbrKmerDecompo.size()==71); + assert(nbrKmerDecompo.single_or_PE_val.size()==71); + assert(nbrKmerDecompo.idx_start_PE2==0); readNumericValues::iterator it; - for (it=nbrKmerDecompo.begin();it!=nbrKmerDecompo.end();it++) { + for (it=nbrKmerDecompo.single_or_PE_val.begin();it!=nbrKmerDecompo.single_or_PE_val.end();it++) { assert(*it==1023); } DnaSeqStr * seq2=&a_seqs[1]; strcpy(seq2->fq_record_buf,"@another_fake_stuff\nATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG\n+\n....@\n"); seq2->start_idx=20; seq2->length=80; - nbrKmerDecompo.clear(); + nbrKmerDecompo.single_or_PE_val.clear(); decomposeReadInKMerNums(read_p,nbrKmerDecompo,k,a_seqs); - assert(nbrKmerDecompo.size()==147); + assert(nbrKmerDecompo.single_or_PE_val.size()==147); } -/* -void initNumbers(KmRevNumbers& numbers) { - numbers.first=1; - numbers.nbr=0; - numbers.nbr_rev=0; -}*/ + void savNumbers(ReadProcessor::KmRevNumbers& numbers,ReadProcessor::KmRevNumbers& numbers_sav) { numbers_sav.first=numbers.first; @@ -384,12 +406,9 @@ void testDNAToNumberMoreComplex() { // TODO in main, check that k<=32. // k-mer number 10 is GCTACCATAACACCAACTGTTTTCACCATA // its reverse complement is : TATGGTGAAAACAGTTGGTGTTATGGTAGC if (cnt_k_mer==10) { - //num=read_p.kMerToNumber((char *) "GCTACCATAACACCAACTGTTTTCACCATA",NULL); assert(*it==max(704021989794238796,930978566989888201)); } - /*if (cnt_k_mer==20) { - assert(*it==930978566989888201); - }*/ + } } @@ -488,6 +507,7 @@ int main(int argc, char **argv) { testDNAToNumberWithN(); cout<<"testing higher level function: decomposeReadInKMerNums"<<endl; test_decomposeReadInkMerNums(); + test_decomposeReadInkMerNums_PEAsSingle(); cout<<"done"<<endl; } diff --git a/test/rock.sh b/test/rock.sh index 861642e3a6b70c9dbff7961f476ca5e0a6e27894..e7a4b8db595213cd0f60e451b4ee4a7ee554164c 100755 --- a/test/rock.sh +++ b/test/rock.sh @@ -67,7 +67,7 @@ echo "doing more checks on options and error messages" ../src/rock -C 500 -c 400 -k 10 -l 12 -n 85000000 -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt|grep "options are mutually exclusive">/dev/null || exit 14 ../src/rock -l 1 -C 500 -c 400 -k 10 -q 3 -m 0 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "minimum number of valid k-mer for keeping a read must be an integer">/dev/null || exit 15 ../src/rock -l 1 -C 500 -c 400 -k 10 -q -1 -m 2 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "q must be">/dev/null || exit 16 -#../src/rock -C 500 -c 400 -k 10 -q 4 -m 2 -p 10 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "value for -p option must be 0, 1 or 2">/dev/null || exit 17 +#../src/rock -C 500 -c 400 -k 10 -q 4 -m 2 -p 10 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "p option is a flag. You cannot provide a value for it">/dev/null || exit 17 #../src/rock -C 500 -c 400 -k 10 -q 4 -m 2 -p 1,5 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "value for -p option must be 0, 1 or 2">/dev/null || exit 18 #../src/rock -C 500 -c 400 -k 10 -q 4 -m 2 -p -1 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "value for -p option must be 0, 1 or 2">/dev/null || exit 17 diff --git a/test/rock_mem.sh b/test/rock_mem.sh index 48a52ad6331b31e1d4be1e83317f6853e460ed66..3c42eae441cfa2c32cf7e3a263610fccab6ea1f7 100755 --- a/test/rock_mem.sh +++ b/test/rock_mem.sh @@ -7,10 +7,10 @@ echo "srcdir"=${srcdir} ## Test high filter echo " " echo "##################################################################################" -echo "testing high filter" +echo "testing high filter with PE processed separately" -../src/rock -C 100 -c 0 -l 2 -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 101 +../src/rock -C 100 -c 0 -l 2 -p -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 101 # output files should be the same size, contain the same elements but not in the same order. nb_PE1=`grep -c "@" ${srcdir}/data/fastq.raw/klebsiella_100_1.fq` @@ -26,11 +26,29 @@ nb_PE2_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_2.filtered.fastq` test $nb_PE1 -eq $nb_PE1_filtered || exit 104 test $nb_PE2 -eq $nb_PE2_filtered || exit 105 +echo "testing high filter with PE processed as single" +../src/rock -C 100 -c 0 -l 2 -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 101 + +# output files should be the same size, contain the same elements but not in the same order. +nb_PE1=`grep -c "@" ${srcdir}/data/fastq.raw/klebsiella_100_1.fq` +nb_PE2=`grep -c "@" ${srcdir}/data/fastq.raw/klebsiella_100_2.fq` + + +[ -f "data/fastq.filtered/SRR1222430_1.filtered.fastq" ] || exit 106 +[ -f "data/fastq.filtered/SRR1222430_2.filtered.fastq" ] || exit 107 + +nb_PE1_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_1.filtered.fastq` +nb_PE2_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_2.filtered.fastq` + +test $nb_PE1 -eq $nb_PE1_filtered || exit 108 +test $nb_PE2 -eq $nb_PE2_filtered || exit 109 + + echo " " echo "##################################################################################" -echo "testing high filter and -f option" +echo "testing high filter and -f option and PE processed separately" -../src/rock -C 100 -c 0 -l 1 -f 0.05 -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 301 +../src/rock -C 100 -c 0 -l 1 -f 0.05 -p -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 110 # output files should be the same size, contain the same elements but not in the same order. nb_PE1=`grep -c "@" ${srcdir}/data/fastq.raw/klebsiella_100_1.fq` @@ -38,57 +56,83 @@ nb_PE2=`grep -c "@" ${srcdir}/data/fastq.raw/klebsiella_100_2.fq` -[ -f "data/fastq.filtered/SRR1222430_1.filtered.fastq" ] || exit 302 -[ -f "data/fastq.filtered/SRR1222430_2.filtered.fastq" ] || exit 303 +[ -f "data/fastq.filtered/SRR1222430_1.filtered.fastq" ] || exit 111 +[ -f "data/fastq.filtered/SRR1222430_2.filtered.fastq" ] || exit 112 nb_PE1_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_1.filtered.fastq` nb_PE2_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_2.filtered.fastq` -test $nb_PE1 -eq $nb_PE1_filtered || exit 304 -test $nb_PE2 -eq $nb_PE2_filtered || exit 305 +test $nb_PE1 -eq $nb_PE1_filtered || exit 112 +test $nb_PE2 -eq $nb_PE2_filtered || exit 114 + +echo "testing high filter and -f option and PE processed as single" + +../src/rock -C 100 -c 0 -l 1 -f 0.05 -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 115 + +# output files should be the same size, contain the same elements but not in the same order. +nb_PE1=`grep -c "@" ${srcdir}/data/fastq.raw/klebsiella_100_1.fq` +nb_PE2=`grep -c "@" ${srcdir}/data/fastq.raw/klebsiella_100_2.fq` + + +[ -f "data/fastq.filtered/SRR1222430_1.filtered.fastq" ] || exit 116 +[ -f "data/fastq.filtered/SRR1222430_2.filtered.fastq" ] || exit 117 + +nb_PE1_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_1.filtered.fastq` +nb_PE2_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_2.filtered.fastq` + +test $nb_PE1 -eq $nb_PE1_filtered || exit 118 +test $nb_PE2 -eq $nb_PE2_filtered || exit 119 # test low filter. echo " " echo "##################################################################################" -echo "testing low filter" -../src/rock -C 100 -c 99 -l 2 -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 106 +echo "testing low filter with PE processed separately" +../src/rock -C 100 -c 99 -l 2 -p -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 120 + +[ -f "data/fastq.filtered/SRR1222430_1.filtered.fastq" ] || exit 121 +[ -f "data/fastq.filtered/SRR1222430_2.filtered.fastq" ] || exit 122 + +nb_PE1_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_1.filtered.fastq` +nb_PE2_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_2.filtered.fastq` + +test $nb_PE1_filtered -eq 0 || exit 123 +test $nb_PE2_filtered -eq 0 || exit 124 + +echo "testing low filter with PE processed as single" +../src/rock -C 100 -c 99 -l 2 -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt >/dev/null || exit 125 -[ -f "data/fastq.filtered/SRR1222430_1.filtered.fastq" ] || exit 107 -[ -f "data/fastq.filtered/SRR1222430_2.filtered.fastq" ] || exit 108 +[ -f "data/fastq.filtered/SRR1222430_1.filtered.fastq" ] || exit 126 +[ -f "data/fastq.filtered/SRR1222430_2.filtered.fastq" ] || exit 127 nb_PE1_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_1.filtered.fastq` nb_PE2_filtered=`grep -c "@" data/fastq.filtered/SRR1222430_2.filtered.fastq` -test $nb_PE1_filtered -eq 0 || exit 109 -test $nb_PE2_filtered -eq 0 || exit 110 +test $nb_PE1_filtered -eq 0 || exit 128 +test $nb_PE2_filtered -eq 0 || exit 129 # test that input fastq file names can be provided in command-line. echo " " echo "##################################################################################" echo "testing that input fastq file names can be provided in command line." -../src/rock -C 100 -c 1 -l 2 ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq ${srcdir}/data/unit/test_single.fq ${srcdir}/data/unit/test_single2.fq >/dev/null || exit 111 +../src/rock -C 100 -c 1 -l 2 -p ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq ${srcdir}/data/unit/test_single.fq ${srcdir}/data/unit/test_single2.fq >/dev/null || exit 130 -[ -f "klebsiella_100_1.rock.fq" ] || exit 112 -[ -f "klebsiella_100_2.rock.fq" ] || exit 113 -[ -f "test_single.rock.fq" ] || exit 114 -[ -f "test_single2.rock.fq" ] || exit 115 +[ -f "klebsiella_100_1.rock.fq" ] || exit 131 +[ -f "klebsiella_100_2.rock.fq" ] || exit 132 +[ -f "test_single.rock.fq" ] || exit 133 +[ -f "test_single2.rock.fq" ] || exit 134 # checking that output files were sorted in decreasing order of quality score. For that expect to have SRR122430.1.1 as 1rst record in filtered file. ret=`head -4 "klebsiella_100_1.rock.fq"|grep -c "SRR1222430.1.1"` -test $ret -eq 2 || exit 116 +test $ret -eq 2 || exit 135 echo "erasing test result files" -rm -f "klebsiella_100_1.rock.fq" || exit 117 -rm -f "klebsiella_100_2.rock.fq" || exit 118 -rm -f "test_single.rock.fq"|| exit 119 -rm -f "test_single2.rock.fq"|| exit 120 -#rm -f "klebsiella_100_1.undefined.fq" || exit 121 -#rm -f "klebsiella_100_2.undefined.fq" || exit 122 -#rm -f "test_single.undefined.fq" || exit 123 -#rm -f "test_single2.undefined.fq" || exit 124 +rm -f "klebsiella_100_1.rock.fq" || exit 136 +rm -f "klebsiella_100_2.rock.fq" || exit 137 +rm -f "test_single.rock.fq"|| exit 138 +rm -f "test_single2.rock.fq"|| exit 139 @@ -96,23 +140,20 @@ rm -f "test_single2.rock.fq"|| exit 120 echo " " echo "##################################################################################" echo "testing verbose mode" -../src/rock -C 100 -c 1 -l 2 -v ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq ${srcdir}/data/unit/test_single.fq ${srcdir}/data/unit/test_single2.fq|grep "count-min sketch size (Gb)" >/dev/null || exit 125 +../src/rock -C 100 -c 1 -l 2 -v ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq ${srcdir}/data/unit/test_single.fq ${srcdir}/data/unit/test_single2.fq|grep "count-min sketch size (Gb)" >/dev/null || exit 140 -../src/rock -C 100 -c 1 -v -n 1000 ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq ${srcdir}/data/unit/test_single.fq ${srcdir}/data/unit/test_single2.fq|grep "expected false positive proba" >/dev/null || exit 126 +../src/rock -C 100 -c 1 -v -n 1000 ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq ${srcdir}/data/unit/test_single.fq ${srcdir}/data/unit/test_single2.fq|grep "expected false positive proba" >/dev/null || exit 141 echo "erasing test result files" -rm -f "klebsiella_100_1.rock.fq" || exit 127 -rm -f "klebsiella_100_2.rock.fq" || exit 128 -rm -f "test_single.rock.fq"|| exit 129 -rm -f "test_single2.rock.fq"|| exit 130 -#rm -f "klebsiella_100_1.undefined.fq" || exit 131 -#rm -f "klebsiella_100_2.undefined.fq" || exit 132 -#rm -f "test_single.undefined.fq" || exit 133 -#rm -f "test_single2.undefined.fq" || exit 134 +rm -f "klebsiella_100_1.rock.fq" || exit 142 +rm -f "klebsiella_100_2.rock.fq" || exit 143 +rm -f "test_single.rock.fq"|| exit 144 +rm -f "test_single2.rock.fq"|| exit 145 + echo " " echo "##################################################################################" -echo "testing rock with a quality score threshold for nucleotides." +echo "testing rock with a quality score threshold for nucleotides and PE processed separately." expected_diff1="304a305,308 \ > @SRR1222430.37 37 length=250 \ @@ -126,63 +167,115 @@ expected_diff2="> @SRR1222430.37 37 length=251 \ > A3>333BFA2FF4GBFFDGGCGED?FGEGGHGFFFEEG?AF13@50>///11B13@@1/>//B0?>////<//B/00??@/--:--.;:C000;:0/0009.-9:.00:-.;9/9...-;.--9@--9:////-9-9..////9/;//;9///.9-..--------..99.9.//////;-;--9-.////://9/9.;.-;-99-.//.;////-;?9;...9-9-----9;-.;.;/.-9.;/99=--;" mkdir tmp -../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 140 +echo "noNQ_Thres" +../src/rock -C 100 -k 30 -c 1 -l 2 -p -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 150 -../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 141 +echo "noNQ_Thres_very_low" +../src/rock -C 100 -k 30 -c 1 -l 2 -p -q 2 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 151 ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` -test $ret1 -eq 0 || exit 142 +echo "before exit 142" +test $ret1 -eq 0 || exit 152 ret2=`diff tmp/klebsiella_100_2_very_low_qual_thres.fq tmp/klebsiella_100_2_no_qual_thres.fq|wc -l` -test $ret2 -eq 0 || exit 143 +echo "before exit 143" +test $ret2 -eq 0 || exit 153 -../src/rock -C 100 -k 30 -c 1 -l 2 -q 12 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_12.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 144 +echo "q 12" +../src/rock -C 100 -k 30 -c 1 -l 2 -p -q 12 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_12.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 154 -../src/rock -C 100 -k 30 -c 1 -l 2 -q 13 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 145 +echo "q 13" +../src/rock -C 100 -k 30 -c 1 -l 2 -p -q 13 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 155 -ret1=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "@SRR1222430.37"` -test $ret1 -eq 1 || exit 146 +ret1=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "@SRR1222430.14"` +echo "before exit 146" +test $ret1 -eq 1 || exit 156 + +ret1=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "@SRR1222430.24"` +echo "before exit 1461" +test $ret1 -eq 1 || exit 157 ret2=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "length"` -test $ret2 -eq 2 || exit 147 +echo "before exit 147" +test $ret2 -eq 4 || exit 158 -ret1=`diff tmp/klebsiella_100_2_13_qual_thres.fq tmp/klebsiella_100_2_12_qual_thres.fq|grep -c "@SRR1222430.37"` -test $ret1 -eq 1 || exit 148 +ret1=`diff tmp/klebsiella_100_2_13_qual_thres.fq tmp/klebsiella_100_2_12_qual_thres.fq|grep -c "@SRR1222430.14"` +echo "before exit 148" +test $ret1 -eq 1 || exit 159 + +ret1=`diff tmp/klebsiella_100_2_13_qual_thres.fq tmp/klebsiella_100_2_12_qual_thres.fq|grep -c "@SRR1222430.24"` +echo "before exit 1481" +test $ret1 -eq 1 || exit 160 ret2=`diff tmp/klebsiella_100_2_13_qual_thres.fq tmp/klebsiella_100_2_12_qual_thres.fq|grep -c "length"` -test $ret2 -eq 2 || exit 149 +echo "before exit 149" +test $ret2 -eq 4 || exit 161 rm -fr tmp echo " " echo "##################################################################################" -echo "testing ROCK with a quality score threshold for nucleotides and minimum number of valid k-mer to keep a read." +echo "testing rock with a quality score threshold for nucleotides and PE processed as single." + +expected_diff1="304a305,308 \ + > @SRR1222430.37 37 length=250 \ +> GCCTTTTCTTTTTCCAGGGAAAACCATCCAGGAGGAACTTTATTATGGCGATGTATGAAGTCGGTACCGTCACGGGTGCCGCGTCGCAGGCACGGGTGACAGGTGCGACAACAAAATGGTCACAGGAGGCGCTGGGGATACAGCCCGGGTCGATTCTGGTGGTCTACCGCAGCGGTAGTGCTGACCTGTATGCGATCAAATCCGTGGACAGCGACACGCAACTGACGCTGACCCGGAATATCACCACCGC \ +> +SRR1222430.37 37 length=250 \ +> AAAABFFFFFFFGCGFGGEFFCHHEEFHHGHHAFEAGAGGGFGGFHGFHGEEEFF5BDHHBDEGCEEHGAEGGFFGFCFHCGGC@E/EEEEHFEC@BBFFFG/GFEFDDCDHHGCC<?FGDDGHFFFGFFCGGG-.AGDGBGGHHGGG?ACGCFGG99B9;?C99B0CDGGCFF;DDFFFFFFFB99:;BBF/A.:9DFFF/F?EEFFFFFF>ADAFDFFFFFFE/ADADDFFFFFA;EEFF/BFFEEFF" + +expected_diff2="> @SRR1222430.37 37 length=251 \ +> GCTGGCCAGCTGGTTAGCAAACGACGAGGTGCTGGCGGGTTTAGCGGGAAAAATTGCGGAAGAGGCGCCGGGAAAAGCGGGGGTGTTATTACAGGTCTACGGCAGATGCGTGTCGCTGGCCGCGGCTTTTAGCGCCTACAAGTCTGGACTTCCCCGTCGGGGGGCAACCACAATTCACCCCGGCTGTATTCCACTCGGCCCCGGTGACCCTTTTGGTGTCGCCCCCGGACACCGTGCCCGTGCCCCGGTAC \ +> +SRR1222430.37 37 length=251 \ +> A3>333BFA2FF4GBFFDGGCGED?FGEGGHGFFFEEG?AF13@50>///11B13@@1/>//B0?>////<//B/00??@/--:--.;:C000;:0/0009.-9:.00:-.;9/9...-;.--9@--9:////-9-9..////9/;//;9///.9-..--------..99.9.//////;-;--9-.////://9/9.;.-;-99-.//.;////-;?9;...9-9-----9;-.;.;/.-9.;/99=--;" mkdir tmp -echo "../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" -../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 150 -echo "../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" -../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 151 +../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 170 + +../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 171 ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` -test $ret1 -eq 0 || exit 152 +test $ret1 -eq 0 || exit 172 ret2=`diff tmp/klebsiella_100_2_very_low_qual_thres.fq tmp/klebsiella_100_2_no_qual_thres.fq|wc -l` -test $ret2 -eq 0 || exit 153 +test $ret2 -eq 0 || exit 173 + +../src/rock -C 100 -k 30 -c 1 -l 2 -q 12 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_12.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 174 +../src/rock -C 100 -k 30 -c 1 -l 2 -q 13 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null ||exit 175 -#[ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 154 -#[ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 155 +ret1=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "@SRR1222430.37"` +test $ret1 -eq 1 || exit 176 + +ret2=`diff tmp/klebsiella_100_1_13_qual_thres.fq tmp/klebsiella_100_1_12_qual_thres.fq|grep -c "length"` +test $ret2 -eq 2 || exit 177 + +ret1=`diff tmp/klebsiella_100_2_13_qual_thres.fq tmp/klebsiella_100_2_12_qual_thres.fq|grep -c "@SRR1222430.37"` +test $ret1 -eq 1 || exit 178 + +ret2=`diff tmp/klebsiella_100_2_13_qual_thres.fq tmp/klebsiella_100_2_12_qual_thres.fq|grep -c "length"` +test $ret2 -eq 2 || exit 179 + +rm -fr tmp + +echo " " +echo "##################################################################################" +echo "testing ROCK with a quality score threshold for nucleotides,minimum number of valid k-mer to keep a read and PE processed separately." + +mkdir tmp +echo "../src/rock -C 100 -k 30 -c 1 -p -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -C 100 -k 30 -c 1 -l 2 -p -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 180 +echo "../src/rock -C 100 -k 30 -c 1 -p -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -C 100 -k 30 -c 1 -l 2 -p -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 181 -#ret1=`cat tmp/klebsiella_100_1.undefined.fq|wc -l` -#ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l` +ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` +test $ret1 -eq 0 || exit 182 +ret2=`diff tmp/klebsiella_100_2_very_low_qual_thres.fq tmp/klebsiella_100_2_no_qual_thres.fq|wc -l` +test $ret2 -eq 0 || exit 183 -#test $ret1 -eq 0 || exit 156 -#test $ret1 -eq 0 || exit 157 # All reads should be rejected. -echo "rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" -../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 158 -[ -f "tmp/klebsiella_100_1_13_qual_thres.fq" ] || exit 159 -[ -f "tmp/klebsiella_100_2_13_qual_thres.fq" ] || exit 160 +echo "rock -k 30 -C 100 -c 1 -l 2 -q 13 -p -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -p -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 184 +[ -f "tmp/klebsiella_100_1_13_qual_thres.fq" ] || exit 185 +[ -f "tmp/klebsiella_100_2_13_qual_thres.fq" ] || exit 186 echo "both files are here" @@ -192,29 +285,22 @@ ret2=`cat tmp/klebsiella_100_2_13_qual_thres.fq|wc -l` echo "ret1="$ret1 echo "ret2="$ret2 -test $ret1 -eq 0 || exit 161 -test $ret2 -eq 0 || exit 162 - -#[ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 159 -#[ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 160 +test $ret1 -eq 0 || exit 187 +test $ret2 -eq 0 || exit 188 -#ret1=`cat tmp/klebsiella_100_1.undefined.fq|wc -l` -#ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l` -#test $ret1 -eq 400 || exit 161 -#test $ret1 -eq 400 || exit 162 -echo "rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" -../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 163 +echo "rock -k 30 -C 100 -c 1 -l 2 -q 13 -p -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -p -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 189 -[ -f "tmp/klebsiella_100_1_13_qual_thres.fq" ] || exit 164 -[ -f "tmp/klebsiella_100_2_13_qual_thres.fq" ] || exit 165 +[ -f "tmp/klebsiella_100_1_13_qual_thres.fq" ] || exit 190 +[ -f "tmp/klebsiella_100_2_13_qual_thres.fq" ] || exit 191 ret1=`cat tmp/klebsiella_100_1_13_qual_thres.fq|wc -l` ret2=`cat tmp/klebsiella_100_2_13_qual_thres.fq|wc -l` -test $ret1 -eq 264 || exit 166 -test $ret2 -eq 264 || exit 167 +test $ret1 -eq 264 || exit 192 +test $ret2 -eq 264 || exit 193 # check that reads that are kept are always the same. lst1=`cat tmp/klebsiella_100_1_13_qual_thres.fq|grep @S|cut -d ' ' -f 1|sort` @@ -222,401 +308,63 @@ lst2=`cat tmp/klebsiella_100_2_13_qual_thres.fq|grep @S|cut -d ' ' -f 1|sort` lst_ref=`cat ${srcdir}/data/non_regression/ids_read_kept_q13m300.txt` -test "$lst1" = "$lst2" || exit 1671 -test "$lst_ref" = "$lst1" || exit 1672 +test "$lst1" = "$lst2" || exit 194 +test "$lst_ref" = "$lst1" || exit 195 -#[ -f "tmp/klebsiella_100_1.undefined.fq" ] || exit 164 -#[ -f "tmp/klebsiella_100_2.undefined.fq" ] || exit 165 +rm -fr tmp -#ret1=`cat tmp/klebsiella_100_1.undefined.fq|wc -l` -#ret2=`cat tmp/klebsiella_100_2.undefined.fq|wc -l` +echo "##################################################################################" +echo "testing ROCK with a quality score threshold for nucleotides,minimum number of valid k-mer to keep a read and PE processed as single." -#test $ret1 -eq 136 || exit 166 -#test $ret1 -eq 136 || exit 167 +mkdir tmp +echo "../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -C 100 -k 30 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_noNQ_Thres.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 200 +echo "../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -C 100 -k 30 -c 1 -l 2 -q 2 -m 5 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_very_low.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 201 -echo " " -echo "***##################################################################################" -#echo "testing ROCK with a quality score threshold for nucleotides and PE processed separately with strict filter " -#../src/rock -C 100 -c 1 -l 2 -q 2 -p 1 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated.txt ${srcdir}/data/fastq.raw/klebsiella_5_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_5_2_bad_scores.fq >/dev/null || exit 168 - -#[ -f "tmp/klebsiella_5_1_bad_scores.undefined.fq" ] || exit 169 -#[ -f "tmp/klebsiella_5_2_bad_scores.undefined.fq" ] || exit 170 - -# ret1=`cat tmp/klebsiella_5_1_bad_scores.undefined.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_2_bad_scores.undefined.fq|wc -l` -# -# test $ret1 -eq 8 || exit 171 -# test $ret2 -eq 8 || exit 171 -# -# ret1=`grep -c SRR1222430.1 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.1 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 2 || exit 172 -# test $ret2 -eq 2 || exit 172 -# -# ret1=`grep -c SRR1222430.4 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.4 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 2 || exit 173 -# test $ret2 -eq 2 || exit 173 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 174 -# test $ret2 -eq 0 || exit 174 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 175 -# test $ret2 -eq 0 || exit 175 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 176 -# test $ret2 -eq 0 || exit 176 -# -# ret1=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# -# test $ret1 -eq 8 || exit 177 -# test $ret2 -eq 8 || exit 177 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 178 -# test $ret2 -eq 2 || exit 178 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 0 || exit 179 -# test $ret2 -eq 0 || exit 179 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 180 -# test $ret2 -eq 2 || exit 180 -# -# rm -f tmp/klebsiella_5_1_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_2_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_1_2_qual_thres.fq -# rm -f tmp/klebsiella_5_2_2_qual_thres.fq - - -# echo " " -# echo "##################################################################################" -# echo "testing ROCK with a quality score threshold for nucleotides, PE processed separately (strict filter) and -m option" -# ../src/rock -C 100 -c 1 -l 2 -q 2 -m 2 -p 1 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated.txt ${srcdir}/data/fastq.raw/klebsiella_5_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_5_2_bad_scores.fq >/dev/null || exit 281 -# -# [ -f "tmp/klebsiella_5_1_bad_scores.undefined.fq" ] || exit 182 -# [ -f "tmp/klebsiella_5_2_bad_scores.undefined.fq" ] || exit 182 -# -# ret1=`cat tmp/klebsiella_5_1_bad_scores.undefined.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_2_bad_scores.undefined.fq|wc -l` -# -# test $ret1 -eq 8 || exit 183 -# test $ret2 -eq 8 || exit 183 -# -# ret1=`grep -c SRR1222430.1 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.1 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 2 || exit 184 -# test $ret2 -eq 2 || exit 184 -# -# ret1=`grep -c SRR1222430.4 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.4 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 2 || exit 185 -# test $ret2 -eq 2 || exit 185 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 186 -# test $ret2 -eq 0 || exit 186 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 187 -# test $ret2 -eq 0 || exit 187 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_5_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_5_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 188 -# test $ret2 -eq 0 || exit 188 -# -# ret1=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# -# test $ret1 -eq 8 || exit 189 -# test $ret2 -eq 8 || exit 189 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 190 -# test $ret2 -eq 2 || exit 190 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 0 || exit 191 -# test $ret2 -eq 0 || exit 191 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 192 -# test $ret2 -eq 2 || exit 192 -# -# rm -f tmp/klebsiella_5_1_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_2_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_1_2_qual_thres.fq -# rm -f tmp/klebsiella_5_2_2_qual_thres.fq -# - - -# echo " " -# echo "##################################################################################" -# echo "testing ROCK with no quality score threshold for nucleotides, no low filter and PE processed separately (strict filter)" -# -# ../src/rock -C 100 -c 0 -l 2 -q 0 -p 1 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated.txt ${srcdir}/data/fastq.raw/klebsiella_5_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_5_2_bad_scores.fq >/dev/null || exit 193 -# -# [ -f "tmp/klebsiella_5_1_bad_scores.undefined.fq" ] || exit 194 -# [ -f "tmp/klebsiella_5_2_bad_scores.undefined.fq" ] || exit 194 -# -# ret1=`cat tmp/klebsiella_5_1_bad_scores.undefined.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_2_bad_scores.undefined.fq|wc -l` -# -# test $ret1 -eq 0 || exit 195 -# test $ret2 -eq 0 || exit 195 -# -# ret1=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# -# test $ret1 -eq 20 || exit 196 -# test $ret2 -eq 20 || exit 196 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 197 -# test $ret2 -eq 2 || exit 197 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 198 -# test $ret2 -eq 2 || exit 198 -# -# ret1=`grep -c SRR1222430.4 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.4 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 199 -# test $ret2 -eq 2 || exit 199 -# -# ret1=`grep -c SRR1222430.1 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.1 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 200 -# test $ret2 -eq 2 || exit 200 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 201 -# test $ret2 -eq 2 || exit 201 -# -# rm -f tmp/klebsiella_5_1_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_2_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_1_2_qual_thres.fq -# rm -f tmp/klebsiella_5_2_2_qual_thres.fq - - -# echo " " -# echo "##################################################################################" -# echo "testing ROCK with no quality score threshold for nucleotides, no low filter and PE processed separately (lenient filter). Results should be the same as with strict filter" -# -# ../src/rock -C 100 -c 0 -l 2 -q 0 -p 2 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated.txt ${srcdir}/data/fastq.raw/klebsiella_5_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_5_2_bad_scores.fq >/dev/null || exit 202 -# -# [ -f "tmp/klebsiella_5_1_bad_scores.undefined.fq" ] || exit 202 -# [ -f "tmp/klebsiella_5_2_bad_scores.undefined.fq" ] || exit 202 -# -# ret1=`cat tmp/klebsiella_5_1_bad_scores.undefined.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_2_bad_scores.undefined.fq|wc -l` -# -# test $ret1 -eq 0 || exit 203 -# test $ret2 -eq 0 || exit 203 -# -# ret1=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# ret2=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` -# -# test $ret1 -eq 20 || exit 204 -# test $ret2 -eq 20 || exit 204 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 205 -# test $ret2 -eq 2 || exit 205 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 206 -# test $ret2 -eq 2 || exit 206 -# -# ret1=`grep -c SRR1222430.4 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.4 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 207 -# test $ret2 -eq 2 || exit 207 -# -# ret1=`grep -c SRR1222430.1 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.1 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 208 -# test $ret2 -eq 2 || exit 208 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_5_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_5_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 209 -# test $ret2 -eq 2 || exit 209 -# -# -# rm -f tmp/klebsiella_5_1_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_2_bad_scores.undefined.fq -# rm -f tmp/klebsiella_5_1_2_qual_thres.fq -# rm -f tmp/klebsiella_5_2_2_qual_thres.fq - - -# echo " " -# echo "##################################################################################" -# echo "testing ROCK with a quality score threshold for nucleotides, PE processed separately (lenient filter) and -m option" -# ../src/rock -C 100 -c 1 -l 2 -q 2 -m 2 -p 2 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated_lenient.txt ${srcdir}/data/fastq.raw/klebsiella_6_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_6_2_bad_scores.fq >/dev/null || exit 210 -# -# [ -f "tmp/klebsiella_6_1_bad_scores.undefined.fq" ] || exit 211 -# [ -f "tmp/klebsiella_6_2_bad_scores.undefined.fq" ] || exit 211 -# -# ret1=`cat tmp/klebsiella_6_1_bad_scores.undefined.fq|wc -l` -# ret2=`cat tmp/klebsiella_6_2_bad_scores.undefined.fq|wc -l` -# -# test $ret1 -eq 8 || exit 212 -# test $ret2 -eq 8 || exit 212 -# -# ret1=`grep -c SRR1222430.1 tmp/klebsiella_6_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.1 tmp/klebsiella_6_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 2 || exit 213 -# test $ret2 -eq 2 || exit 213 -# -# ret1=`grep -c SRR1222430.4 tmp/klebsiella_6_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.4 tmp/klebsiella_6_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 2 || exit 214 -# test $ret2 -eq 2 || exit 214 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_6_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_6_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 215 -# test $ret2 -eq 0 || exit 215 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_6_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_6_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 216 -# test $ret2 -eq 0 || exit 216 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_6_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_6_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 217 -# test $ret2 -eq 0 || exit 217 -# -# ret1=`grep -c SRR1222430.6 tmp/klebsiella_6_1_bad_scores.undefined.fq` -# ret2=`grep -c SRR1222430.6 tmp/klebsiella_6_2_bad_scores.undefined.fq` -# -# test $ret1 -eq 0 || exit 218 -# test $ret2 -eq 0 || exit 218 -# -# ret1=`cat tmp/klebsiella_6_1_2_qual_thres.fq|wc -l` -# ret2=`cat tmp/klebsiella_6_1_2_qual_thres.fq|wc -l` -# -# test $ret1 -eq 8 || exit 219 -# test $ret2 -eq 8 || exit 219 -# -# ret1=`grep -c SRR1222430.2 tmp/klebsiella_6_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.2 tmp/klebsiella_6_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 220 -# test $ret2 -eq 2 || exit 220 -# -# ret1=`grep -c SRR1222430.3 tmp/klebsiella_6_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.3 tmp/klebsiella_6_2_2_qual_thres.fq` -# -# test $ret1 -eq 0 || exit 221 -# test $ret2 -eq 0 || exit 221 -# -# ret1=`grep -c SRR1222430.5 tmp/klebsiella_6_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.5 tmp/klebsiella_6_2_2_qual_thres.fq` -# -# test $ret1 -eq 2 || exit 222 -# test $ret2 -eq 2 || exit 222 -# -# ret1=`grep -c SRR1222430.6 tmp/klebsiella_6_1_2_qual_thres.fq` -# ret2=`grep -c SRR1222430.6 tmp/klebsiella_6_2_2_qual_thres.fq` -# -# test $ret1 -eq 0 || exit 223 -# test $ret2 -eq 0 || exit 223 -# -# mv tmp/klebsiella_6_1_bad_scores.undefined.fq tmp/klebsiella_6_1_bad_scores.undefined.fq.lenient -# mv tmp/klebsiella_6_2_bad_scores.undefined.fq tmp/klebsiella_6_2_bad_scores.undefined.fq.lenient -# mv tmp/klebsiella_6_1_2_qual_thres.fq tmp/klebsiella_6_1_2_qual_thres.fq.lenient -# mv tmp/klebsiella_6_2_2_qual_thres.fq tmp/klebsiella_6_2_2_qual_thres.fq.lenient -# -# echo " Now use the scrict filter on the same data and check that the only difference is read SRR1222430.6 that should be kept by scrict filter" -# -# ../src/rock -C 100 -c 1 -l 2 -q 2 -m 2 -p 1 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated_lenient.txt ${srcdir}/data/fastq.raw/klebsiella_6_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_6_2_bad_scores.fq >/dev/null || exit 224 -# -# ret1=`diff tmp/klebsiella_6_1_bad_scores.undefined.fq tmp/klebsiella_6_1_bad_scores.undefined.fq.lenient|wc -l` -# test $ret1 -eq 0 ||exit 225 -# -# ret2=`diff tmp/klebsiella_6_2_bad_scores.undefined.fq tmp/klebsiella_6_2_bad_scores.undefined.fq.lenient|wc -l` -# test $ret2 -eq 0 ||exit 226 -# -# ret3=`diff tmp/klebsiella_6_1_2_qual_thres.fq tmp/klebsiella_6_1_2_qual_thres.fq.lenient|grep -c SRR1222430.6` -# test $ret3 -eq 2 ||exit 227 -# -# ret4=`diff tmp/klebsiella_6_1_2_qual_thres.fq tmp/klebsiella_6_1_2_qual_thres.fq.lenient|wc -l` -# test $ret4 -eq 5 || exit 228 # put 5 because there are 4 lines for 1 read and 1rts line that looks like: "9,12d8" -# -# ret5=`diff tmp/klebsiella_6_2_2_qual_thres.fq tmp/klebsiella_6_2_2_qual_thres.fq.lenient|grep -c SRR1222430.6` -# test $ret5 -eq 2 ||exit 229 -# -# ret6=`diff tmp/klebsiella_6_2_2_qual_thres.fq tmp/klebsiella_6_2_2_qual_thres.fq.lenient|wc -l` -# test $ret6 -eq 5 || exit 230 -# -# -# -# rm -f tmp/klebsiella_6_1_bad_scores.undefined.fq -# rm -f tmp/klebsiella_6_2_bad_scores.undefined.fq -# rm -f tmp/klebsiella_6_1_2_qual_thres.fq -# rm -f tmp/klebsiella_6_2_2_qual_thres.fq -# rm -fr tmp/klebsiella_6_1_bad_scores.undefined.fq.lenient -# rm -fr tmp/klebsiella_6_2_bad_scores.undefined.fq.lenient -# rm -fr tmp/klebsiella_6_1_2_qual_thres.fq.lenient -# rm -fr tmp/klebsiella_6_2_2_qual_thres.fq.lenient +ret1=`diff tmp/klebsiella_100_1_very_low_qual_thres.fq tmp/klebsiella_100_1_no_qual_thres.fq|wc -l` +test $ret1 -eq 0 || exit 202 +ret2=`diff tmp/klebsiella_100_2_very_low_qual_thres.fq tmp/klebsiella_100_2_no_qual_thres.fq|wc -l` +test $ret2 -eq 0 || exit 203 + +# All reads should be rejected. +echo "rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 500 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 204 +[ -f "tmp/klebsiella_100_1_13_qual_thres.fq" ] || exit 205 +[ -f "tmp/klebsiella_100_2_13_qual_thres.fq" ] || exit 206 + +echo "both files are here" + +ret1=`cat tmp/klebsiella_100_1_13_qual_thres.fq|wc -l` +ret2=`cat tmp/klebsiella_100_2_13_qual_thres.fq|wc -l` + +echo "ret1="$ret1 +echo "ret2="$ret2 + +test $ret1 -eq 0 || exit 161 +test $ret2 -eq 0 || exit 162 + + +echo "rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq" +../src/rock -k 30 -C 100 -c 1 -l 2 -q 13 -m 300 -o ${srcdir}/data/iofiles.args/output_files_NQ_Thres_13.txt ${srcdir}/data/fastq.raw/klebsiella_100_1.fq,${srcdir}/data/fastq.raw/klebsiella_100_2.fq >/dev/null || exit 207 + +[ -f "tmp/klebsiella_100_1_13_qual_thres.fq" ] || exit 208 +[ -f "tmp/klebsiella_100_2_13_qual_thres.fq" ] || exit 209 + +ret1=`cat tmp/klebsiella_100_1_13_qual_thres.fq|wc -l` +ret2=`cat tmp/klebsiella_100_2_13_qual_thres.fq|wc -l` + +test $ret1 -eq 264 || exit 210 +test $ret2 -eq 264 || exit 211 +# check that reads that are kept are always the same. +lst1=`cat tmp/klebsiella_100_1_13_qual_thres.fq|grep @S|cut -d ' ' -f 1|sort` +lst2=`cat tmp/klebsiella_100_2_13_qual_thres.fq|grep @S|cut -d ' ' -f 1|sort` + +lst_ref=`cat ${srcdir}/data/non_regression/ids_read_kept_q13m300.txt` +test "$lst1" = "$lst2" || exit 212 +test "$lst_ref" = "$lst1" || exit 213 rm -fr tmp