-
Veronique Legrand authoredVeronique Legrand authored
FqMainBackend.cpp 6.67 KiB
/*
* FqMainProcessor.cpp
*
* Created on: Jan 7, 2016
* Author: vlegrand
*/
#include <stdio.h>
//#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <err.h>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
#include <stdexcept>
#include <sstream>
#include <string>
#include <iostream>
#define _FILE_OFFSET_BITS 64 // for portability reasons on 32bits systems.
//#define DEBUG
#ifdef DEBUG
#include <string.h>
#include <iostream>
using namespace std;
#endif
#include "FqMainBackend.h"
#include "srp.h"
FqMainBackend::FqMainBackend(srp * io_sr):FqBaseBackend() {
p_auxFqProcessor=NULL;
p_scoreReadStruct=io_sr;
}
void FqMainBackend::processFile(char * filename,unsigned char f_id) {
FILE * fp;
int st,s;
int cnt,nread;
long cur_offset;
// int nb_read_oper=0;
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
//cout<<"INT_MAX="<<INT_MAX<<endl;
// cout<<filename<<endl;
this->i_filename=filename;
this->f_id=f_id;
int f_single=open(filename,O_RDONLY,mode);
if (f_single==-1) {
err(errno,"cannot open file: %s.",filename);
}
fp=fdopen(f_single,"r");
if (fp==NULL) {
err(errno,"cannot open file: %s.",filename);
}
//cur_offset=ftell(fp);
//cout<<"cur_offset="<<cur_offset<<endl;
char* buf=(char *) malloc(FqBaseBackend::bufsize*sizeof(char));
if (buf==NULL) {
err(errno,"cannot allocate memory: %lu bytes.",bufsize);
}
/* for each read, we want : offset, filenb, total quality score. */
while ((nread=read(f_single,buf,FqBaseBackend::bufsize))!=0) {
// nb_read_oper++;
//cout<<"BE1: read operation nbr: "<<nb_read_oper<<endl;
//printf("going to call ftell on : %p \n",fp);
//cur_offset=ftell(fp); // don't know why but it seems that this doesn't work under linux; always returns 0...
cur_offset = lseek(f_single, 0, SEEK_CUR);
//cout<<"cur_offset="<<cur_offset<<endl;
processBuf((char *)buf,nread,f_id,cur_offset);
}
close(f_single);
free(buf);
}
#ifdef DEBUG
#define init_debug 1
#define on_record_new 2
#define on_record_end 3
#define on_buf_end 4
#define on_line_end 5
#define on_store_read_id 6
#define on_record_end_pe 7
void FqMainBackend::debug_processBuf(int evt,int& cnt,int& nread,char * pchar,unsigned long& rstart_offset) { // put all the bebug stuff in a separate method so that processBuf is shorter and more "lisible".
static int nb_start; // number of fq record start and stop found in current buffer.
static int nb_stop;
static int tot_nb_stop=0;
static int tot_nb_start=0;
static int idx_id;
static int is_id=0;
if (cnt==0) {
nb_start=0;
nb_stop=0;
}
switch(evt) {
case on_record_new:
cout<<"nread="<<nread<<endl;
cout<<"cnt="<<cnt<<endl;
cout<<"rstart_offset="<<rstart_offset<<endl;
is_id=1;
idx_id=0;
nb_start++;
break;
case on_line_end:
is_id=0;
break;
case on_record_end:
nb_stop++;
cout<<"just finished processing : "<<endl;
cout<<current_id<<endl;
cout<<"j obttained: rstart_offset/UINT_MAX="<<rstart_offset/UINT_MAX<<endl;
break;
case on_store_read_id:
if (is_id==1) {
if (*pchar!='/' and *pchar !=' ') current_id[idx_id]=*pchar;
else current_id[idx_id]='\0';
idx_id+=1;
}
break;
case on_buf_end:
std::cout<<" Main BE position in buffer "<<cnt<<endl;
assert(cnt==nread);
cout<<"Main BE Found "<<nb_start<<" and "<<nb_stop<<" start/stop of record in this buffer."<<endl;
tot_nb_start+=nb_start;
tot_nb_stop+=nb_stop;
cout<<"Main BE Total cumulated number of start: "<<tot_nb_start<<" Total number of stop: "<<tot_nb_stop<<endl;
break;
case on_record_end_pe:
cout<<p_auxFqProcessor->current_id<<endl;
assert(strcmp(current_id,p_auxFqProcessor->current_id)==0);
break;
}
}
#endif
void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned long cur_offset) {
int cnt=0;
unsigned int s;
static unsigned int st;
static int num_l_in_rec; /* counter to know on which line inside the fastq record we are */
static int qual_score=0;
static unsigned long rstart_offset;
#ifdef DEBUG
debug_processBuf(init_debug,cnt,nread,NULL,rstart_offset);
#endif
char * pchar=buf;
while (cnt<=nread-1) {
switch (*pchar){
case k_read_id_start:
if (qual_score) goto inc_score;
else {
rstart_offset=cur_offset-nread+cnt;
#ifdef DEBUG
debug_processBuf(on_record_new,cnt,nread,NULL,rstart_offset);
#endif
num_l_in_rec=1; }
break;
case k_read_qual_start:
qual_score=1;
st=0;
break;
case '\n':
#ifdef DEBUG
debug_processBuf(on_line_end,cnt,nread,pchar,rstart_offset);
#endif
num_l_in_rec+=1;
if (num_l_in_rec==5) {
qual_score=0;/* end of fastq record */
rpos rp=init_rpos(f_id,rstart_offset);
if (p_auxFqProcessor!=NULL) {
rinfo pe2info;
int eof=p_auxFqProcessor->getNextRead(&pe2info);
assert(!eof); // There should be the same number of reads in both PE files.
st+=pe2info.score;
unsigned long j=rstart_offset/UINT_MAX;
update_rpos(pe2info.f_id,pe2info.rstart_offset,j,&rp);
#ifdef DEBUG
debug_processBuf(on_record_end_pe,cnt,nread,pchar,rstart_offset);
#endif
}
i_dim& ref_i_dim=(*p_scoreReadStruct)[st/K_SCORE_NORM_FACTOR];
k_dim& ref_k_dim=ref_i_dim[rstart_offset/UINT_MAX];
#ifdef DEBUG
debug_processBuf(on_record_end,cnt,nread,pchar,rstart_offset);
#endif
ref_k_dim.push_back(rp);
}
break;
default:
#ifdef DEBUG
store_id: debug_processBuf(on_store_read_id,cnt,nread,pchar,rstart_offset);
#endif
inc_score:
{ if (qual_score==1) {
s=(int)*pchar;
s-=k_phred_32;
st+=s;
}
}
break;
}
pchar++;
cnt++;
}
#ifdef DEBUG
debug_processBuf(on_buf_end,cnt,nread,pchar,rstart_offset);
#endif
}