From b7fa30b188d38b024356ce7e827f40a620da32dd Mon Sep 17 00:00:00 2001 From: Veronique Legrand <vlegrand@pasteur.fr> Date: Fri, 14 Apr 2017 09:47:15 +0200 Subject: [PATCH] did some optimization for rock 1.6; still need to debug some thing --- src/CountMinSketch.hpp | 12 +++++++++++- src/FqBaseBackend.cpp | 1 + src/ROCKparams.h | 4 +++- src/ReadProcessor.cpp | 2 +- src/ReadProcessor.h | 2 ++ 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/CountMinSketch.hpp b/src/CountMinSketch.hpp index c42cc97..00ac757 100644 --- a/src/CountMinSketch.hpp +++ b/src/CountMinSketch.hpp @@ -380,9 +380,19 @@ template<typename T> int CountMinSketch<T>::addRead(const T_read_numericValues& int keep_r=isRCovBelowThres(read_val,kappa); if (keep_r) { readNumericValues::const_iterator it; - for (it=read_val.single_or_PE_val.begin();it!=read_val.single_or_PE_val.end();it++) { + // according to the intel profiler, we spend a lot of time in this loop. Try to use something slightly faster. + long const * p=(long const *) &read_val.single_or_PE_val[0]; + int cnt; + int stop=read_val.single_or_PE_val.size(); + for (cnt=0;cnt<stop;cnt++) { + this->addKMer(*p); + p=p+1; + } +/* + for (it=read_val.single_or_PE_val.begin();it!=read_val.single_or_PE_val.end();it++) { this->addKMer(*it); } +*/ } return keep_r; } diff --git a/src/FqBaseBackend.cpp b/src/FqBaseBackend.cpp index 0891334..4ba0745 100644 --- a/src/FqBaseBackend.cpp +++ b/src/FqBaseBackend.cpp @@ -236,6 +236,7 @@ void FqBaseBackend::keepCurFastqRecord(char * buf,const int& start_rec_in_buf,co void FqBaseBackend::onIncScore(T_fq_rec_info& rec_info,T_buf_info& buf_info,int& n) { unsigned int s=(int)*buf_info.pchar; + // printf("s=%u; qual_thres.nucl_score_threshold=%d \n",s,qual_thres.nucl_score_threshold); unsigned int remaining_nucl=rec_info.nb_nucleotides_in_read-rec_info.idx_nucl_in_read; if (s<=qual_thres.nucl_score_threshold) { // maybe TODO rewrite this with chained ternary operators once it is clear to see if it improves performances.Not useful: performance bottleneck is not here but in median calculation (42% of time approximatively for both filters). if (rec_info.idx_nucl_in_read<=qual_thres.k-1) { // error is found in the first k nucleotides diff --git a/src/ROCKparams.h b/src/ROCKparams.h index a7f1bd5..a120767 100644 --- a/src/ROCKparams.h +++ b/src/ROCKparams.h @@ -14,6 +14,7 @@ #include <vector> #include <getopt.h> #include <iostream> +#include "FqConstants.h" #include "CountMinSketch.hpp" #include "main_utils.h" @@ -89,7 +90,8 @@ public: parms.lambda=0; parms.filter_PE_as_single=0; qual_thres.min_correct_k_mers_in_read=1; - qual_thres.nucl_score_threshold=10; + qual_thres.nucl_score_threshold=10+k_phred_32; + qual_thres.k=k; verbose_mode=0; cms_size=0; expected_collision_proba=0.0; diff --git a/src/ReadProcessor.cpp b/src/ReadProcessor.cpp index 69350b3..0ecaed4 100644 --- a/src/ReadProcessor.cpp +++ b/src/ReadProcessor.cpp @@ -46,7 +46,7 @@ unsigned long ReadProcessor::kMerToNumber(char * k_m,unsigned long * p_prev) { return nbr; } - +// Tried to inline this but it is sligntly slower. Put it back not inline. unsigned long ReadProcessor::kMerToNumberReverse(char * k_m,unsigned long * p_prev) { unsigned long nbr=0; unsigned long c; diff --git a/src/ReadProcessor.h b/src/ReadProcessor.h index 71fa480..ab39c6c 100644 --- a/src/ReadProcessor.h +++ b/src/ReadProcessor.h @@ -83,6 +83,8 @@ public: this->k=k; init_mask(k); } + + // Do not inline this; it is slightly slower. unsigned long kMerToNumberReverse(char * k_m,unsigned long * p_prev); -- GitLab