diff --git a/src/FqAuxBackend.cpp b/src/FqAuxBackend.cpp index 7e78a1750a2d60535f918ee453aca5448927e31a..c237db37941aca75d728d6bcaa282ee2e95048f7 100644 --- a/src/FqAuxBackend.cpp +++ b/src/FqAuxBackend.cpp @@ -20,7 +20,7 @@ #include <iostream> using namespace std; -//#define DEBUG +// #define DEBUG int FqAuxBackend::getNextRead(rinfo * p_nr) { rinfo nr; @@ -95,7 +95,7 @@ int FqAuxBackend::processBuffer(rinfo * p_nr) { #ifdef DEBUG store_id: { if (is_id==1) { - if (*pchar!='\/') current_id[idx_id]=*pchar; + if (*pchar!='/' and *pchar!=' ') current_id[idx_id]=*pchar; else current_id[idx_id]='\0'; idx_id+=1; } diff --git a/src/FqBaseBackend.cpp b/src/FqBaseBackend.cpp index bd841c40d459ab29bb124e85d400086f92340f40..96d7f89846aad0979a50524e69ddab0dee70227e 100644 --- a/src/FqBaseBackend.cpp +++ b/src/FqBaseBackend.cpp @@ -140,6 +140,9 @@ int FqBaseBackend::getRead(const unsigned long& offset, char * fq_record) { if (res==-1) { // err(errno,"fseek problem when trying to retrieve dna string."); // add exception for debug +#ifdef DEBUG + cout<<"Couldn't get read at offset: "<<offset<<endl; +#endif throw errno; } nread=read(i_f_desc,fq_record,MAX_FQ_RECORD_LENGTH); diff --git a/src/FqMainBackend.cpp b/src/FqMainBackend.cpp index c68b2afe67b34a9be09a8c36932b064f5f9e022a..e43408c0a9f6a51f3551734a2444e36cfe145b9d 100644 --- a/src/FqMainBackend.cpp +++ b/src/FqMainBackend.cpp @@ -44,7 +44,7 @@ void FqMainBackend::processFile(char * filename,unsigned char f_id) { int st,s; int cnt,nread; long cur_offset; - int nb_read_oper=0; + // int nb_read_oper=0; mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; //cout<<"INT_MAX="<<INT_MAX<<endl; // cout<<filename<<endl; @@ -68,7 +68,7 @@ void FqMainBackend::processFile(char * filename,unsigned char f_id) { } /* for each read, we want : offset, filenb, total quality score. */ while ((nread=read(f_single,buf,FqBaseBackend::bufsize))!=0) { - nb_read_oper++; + // nb_read_oper++; //cout<<"BE1: read operation nbr: "<<nb_read_oper<<endl; //printf("going to call ftell on : %p \n",fp); //cur_offset=ftell(fp); // don't know why but it seems that this doesn't work under linux; always returns 0... @@ -81,16 +81,18 @@ void FqMainBackend::processFile(char * filename,unsigned char f_id) { } - -void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned long cur_offset) { - int cnt=0; - unsigned int s; - static unsigned int st; - static int num_l_in_rec; /* counter to know on which line inside the fastq record we are */ - static int qual_score=0; - static unsigned long rstart_offset; #ifdef DEBUG - static int nb_start; + +#define init_debug 1 +#define on_record_new 2 +#define on_record_end 3 +#define on_buf_end 4 +#define on_line_end 5 +#define on_store_read_id 6 +#define on_record_end_pe 7 + +void FqMainBackend::debug_processBuf(int evt,int& cnt,int& nread,char * pchar,unsigned long& rstart_offset) { // put all the bebug stuff in a separate method so that processBuf is shorter and more "lisible". + static int nb_start; // number of fq record start and stop found in current buffer. static int nb_stop; static int tot_nb_stop=0; static int tot_nb_start=0; @@ -100,6 +102,58 @@ void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned nb_start=0; nb_stop=0; } + + switch(evt) { + case on_record_new: + cout<<"nread="<<nread<<endl; + cout<<"cnt="<<cnt<<endl; + cout<<"rstart_offset="<<rstart_offset<<endl; + is_id=1; + idx_id=0; + nb_start++; + break; + case on_line_end: + is_id=0; + break; + case on_record_end: + nb_stop++; + cout<<"just finished processing : "<<endl; + cout<<current_id<<endl; + break; + case on_store_read_id: + if (is_id==1) { + if (*pchar!='/' and *pchar !=' ') current_id[idx_id]=*pchar; + else current_id[idx_id]='\0'; + idx_id+=1; + } + break; + case on_buf_end: + std::cout<<" Main BE position in buffer "<<cnt<<endl; + assert(cnt==nread); + cout<<"Main BE Found "<<nb_start<<" and "<<nb_stop<<" start/stop of record in this buffer."<<endl; + tot_nb_start+=nb_start; + tot_nb_stop+=nb_stop; + cout<<"Main BE Total cumulated number of start: "<<tot_nb_start<<" Total number of stop: "<<tot_nb_stop<<endl; + break; + case on_record_end_pe: + cout<<p_auxFqProcessor->current_id<<endl; + assert(strcmp(current_id,p_auxFqProcessor->current_id)==0); + break; + } + } +#endif + + +void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned long cur_offset) { + int cnt=0; + unsigned int s; + static unsigned int st; + static int num_l_in_rec; /* counter to know on which line inside the fastq record we are */ + static int qual_score=0; + static unsigned long rstart_offset; + int limit_case=0; +#ifdef DEBUG + debug_processBuf(init_debug,cnt,nread,NULL,rstart_offset); #endif char * pchar=buf; while (cnt<=nread-1) { @@ -107,40 +161,33 @@ void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned case k_read_id_start: if (qual_score) goto inc_score; else { - rstart_offset=cur_offset-nread+cnt; + rstart_offset=cur_offset-nread+cnt; #ifdef DEBUG - cout<<"nread="<<nread<<endl; - cout<<"cnt="<<cnt<<endl; - cout<<"rstart_offset="<<rstart_offset<<endl; - is_id=1; - idx_id=0; - nb_start++; + debug_processBuf(on_record_new,cnt,nread,NULL,rstart_offset); #endif - // rstart_offset=cur_offset-nread+cnt; - num_l_in_rec=1; } - break; + num_l_in_rec=1; } + break; case k_read_qual_start: qual_score=1; st=0; break; case '\n': #ifdef DEBUG - is_id=0; + debug_processBuf(on_line_end,cnt,nread,pchar,rstart_offset); #endif num_l_in_rec+=1; if (num_l_in_rec==5) { - // std::cout<<"BE1 position in buffer "<<cnt+1<<endl; //add 1 to compare value with BE2. qual_score=0;/* end of fastq record */ rpos rp=init_rpos(f_id,rstart_offset); +#ifdef DEBUG + debug_processBuf(on_record_end,cnt,nread,pchar,rstart_offset); +#endif if (p_auxFqProcessor!=NULL) { rinfo pe2info; int eof=p_auxFqProcessor->getNextRead(&pe2info); assert(!eof); // There should be the same number of reads in both PE files. - - // if (eof) cout<<"warning, reached eof in file PE2 after: "<<nb_read_PE1<<" reads in PE1"<<endl; st+=pe2info.score; unsigned long j=rstart_offset/UINT_MAX; - // if (pe2info.rstart_offset/UINT_MAX!=j) throw std::runtime_error("Inconsistency between offset values in PE files!"); if (pe2info.rstart_offset/UINT_MAX!=j) { std::stringstream ss; ss<<"Inconsistency between offset values in PE files! offset1="; @@ -149,34 +196,25 @@ void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned ss<<pe2info.rstart_offset; // throw std::runtime_error(ss.str()); // "limit case. For the moment, skip read, will see later how to handle it. std::cout<<ss.str()<<std::endl; + limit_case=1; } else { update_rpos(pe2info.f_id,pe2info.rstart_offset,j,&rp); } #ifdef DEBUG - nb_stop++; - cout<<"Just processed : "<<endl; - if (strcmp(current_id,"NS500443:42:H3MH2AFXX:1:11208:4026:13258")==0) { - cout<<"put breakpoint here"<<endl; - } - cout<<current_id<<endl; - cout<<p_auxFqProcessor->current_id<<endl; - assert(strcmp(current_id,p_auxFqProcessor->current_id)==0); + debug_processBuf(on_record_end_pe,cnt,nread,pchar,rstart_offset); #endif } - i_dim& ref_i_dim=(*p_scoreReadStruct)[st/K_SCORE_NORM_FACTOR]; - k_dim& ref_k_dim=ref_i_dim[rstart_offset/UINT_MAX]; - ref_k_dim.push_back(rp); } - + if (!limit_case) { // skip limit_case for the moment and see how it impacts perfs on loading data. + i_dim& ref_i_dim=(*p_scoreReadStruct)[st/K_SCORE_NORM_FACTOR]; + k_dim& ref_k_dim=ref_i_dim[rstart_offset/UINT_MAX]; + ref_k_dim.push_back(rp); + } else std::cout<<"skipping limit case read"<<std::endl; // debug stuff, TODO remove it. + limit_case=0; + } break; default: #ifdef DEBUG - store_id: - { if (is_id==1) { - if (*pchar!='\/') current_id[idx_id]=*pchar; - else current_id[idx_id]='\0'; - idx_id+=1; - } - } + store_id: debug_processBuf(on_store_read_id,cnt,nread,pchar,rstart_offset); #endif inc_score: { if (qual_score==1) { @@ -191,13 +229,7 @@ void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned cnt++; } #ifdef DEBUG - std::cout<<"BE1 position in buffer "<<cnt<<endl; - if (cnt==nread) { - cout<<"BE1 Found "<<nb_start<<" and "<<nb_stop<<" start/stop of record in this buffer."<<endl; - tot_nb_start+=nb_start; - tot_nb_stop+=nb_stop; - cout<<"BE1 Total cumulated number of start: "<<tot_nb_start<<" Total number of stop: "<<tot_nb_stop<<endl; - } + debug_processBuf(on_buf_end,cnt,nread,pchar,rstart_offset); #endif } diff --git a/src/FqMainBackend.h b/src/FqMainBackend.h index 6ef5565f62676b68c27bf31a79f0f979d9cb68f4..db7414e12ff2262db36f3d37bef0152cd4cd3cbe 100644 --- a/src/FqMainBackend.h +++ b/src/FqMainBackend.h @@ -21,6 +21,8 @@ class FqMainBackend : public FqBaseBackend { srp * p_scoreReadStruct; /* Where we store information about the reads. */ char current_id[50]; + void debug_processBuf(int evt,int& cnt,int& nread,char * pchar, unsigned long &); + friend void test_processInputFiles(); public: diff --git a/src/read_utils.cpp b/src/read_utils.cpp index 0639f4195a19fac9bb5a83f3dbfe4906220ce697..496e9ae3086ee9dd3b045146419118ce3b95ec7b 100644 --- a/src/read_utils.cpp +++ b/src/read_utils.cpp @@ -26,7 +26,12 @@ void getFqRecord(FqBaseBackend* fq_files_be[],const unsigned char& f_id,const unsigned long& offset, char * fq_record) { FqBaseBackend * be=fq_files_be[f_id-1]; - be->getRead(offset,fq_record); + try { + be->getRead(offset,fq_record); + } catch (int e) { + cout<<"couldn't retrieve read at offset: "<<offset<<endl; + throw e; + } }