diff --git a/src/CountMinSketch.hpp b/src/CountMinSketch.hpp index c42cc97d2596edf7aa39775d09d20b547f5d672d..00ac75710b3fcac989b424688f3a588e270e5684 100644 --- a/src/CountMinSketch.hpp +++ b/src/CountMinSketch.hpp @@ -380,9 +380,19 @@ template<typename T> int CountMinSketch<T>::addRead(const T_read_numericValues& int keep_r=isRCovBelowThres(read_val,kappa); if (keep_r) { readNumericValues::const_iterator it; - for (it=read_val.single_or_PE_val.begin();it!=read_val.single_or_PE_val.end();it++) { + // according to the intel profiler, we spend a lot of time in this loop. Try to use something slightly faster. + long const * p=(long const *) &read_val.single_or_PE_val[0]; + int cnt; + int stop=read_val.single_or_PE_val.size(); + for (cnt=0;cnt<stop;cnt++) { + this->addKMer(*p); + p=p+1; + } +/* + for (it=read_val.single_or_PE_val.begin();it!=read_val.single_or_PE_val.end();it++) { this->addKMer(*it); } +*/ } return keep_r; } diff --git a/src/FqBaseBackend.cpp b/src/FqBaseBackend.cpp index 08913346c4b951c0493c5fe05e3acbc1bf2aeb35..4ba07458c5456fb87f0abfab2d1ad0965b68b1de 100644 --- a/src/FqBaseBackend.cpp +++ b/src/FqBaseBackend.cpp @@ -236,6 +236,7 @@ void FqBaseBackend::keepCurFastqRecord(char * buf,const int& start_rec_in_buf,co void FqBaseBackend::onIncScore(T_fq_rec_info& rec_info,T_buf_info& buf_info,int& n) { unsigned int s=(int)*buf_info.pchar; + // printf("s=%u; qual_thres.nucl_score_threshold=%d \n",s,qual_thres.nucl_score_threshold); unsigned int remaining_nucl=rec_info.nb_nucleotides_in_read-rec_info.idx_nucl_in_read; if (s<=qual_thres.nucl_score_threshold) { // maybe TODO rewrite this with chained ternary operators once it is clear to see if it improves performances.Not useful: performance bottleneck is not here but in median calculation (42% of time approximatively for both filters). if (rec_info.idx_nucl_in_read<=qual_thres.k-1) { // error is found in the first k nucleotides diff --git a/src/ROCKparams.h b/src/ROCKparams.h index a7f1bd51c2752bfded8c2e6153e6dbae0fb35163..a12076794069bffe206c87b37676bb43d8a07e5a 100644 --- a/src/ROCKparams.h +++ b/src/ROCKparams.h @@ -14,6 +14,7 @@ #include <vector> #include <getopt.h> #include <iostream> +#include "FqConstants.h" #include "CountMinSketch.hpp" #include "main_utils.h" @@ -89,7 +90,8 @@ public: parms.lambda=0; parms.filter_PE_as_single=0; qual_thres.min_correct_k_mers_in_read=1; - qual_thres.nucl_score_threshold=10; + qual_thres.nucl_score_threshold=10+k_phred_32; + qual_thres.k=k; verbose_mode=0; cms_size=0; expected_collision_proba=0.0; diff --git a/src/ReadProcessor.cpp b/src/ReadProcessor.cpp index 69350b3756d0c89cff9c2f387b214859fbeb6e16..0ecaed4e5ee8e1b2140b399ac2513b3424919ee4 100644 --- a/src/ReadProcessor.cpp +++ b/src/ReadProcessor.cpp @@ -46,7 +46,7 @@ unsigned long ReadProcessor::kMerToNumber(char * k_m,unsigned long * p_prev) { return nbr; } - +// Tried to inline this but it is sligntly slower. Put it back not inline. unsigned long ReadProcessor::kMerToNumberReverse(char * k_m,unsigned long * p_prev) { unsigned long nbr=0; unsigned long c; diff --git a/src/ReadProcessor.h b/src/ReadProcessor.h index 71fa4805f9003f48a47e2e330aa11a4b10551afb..ab39c6cfd526d15e5e261fef86d3158e6e43d805 100644 --- a/src/ReadProcessor.h +++ b/src/ReadProcessor.h @@ -83,6 +83,8 @@ public: this->k=k; init_mask(k); } + + // Do not inline this; it is slightly slower. unsigned long kMerToNumberReverse(char * k_m,unsigned long * p_prev);