Skip to content
Snippets Groups Projects
FqMainBackend.cpp 6.67 KiB
/*
 * FqMainProcessor.cpp
 *
 *  Created on: Jan 7, 2016
 *      Author: vlegrand
 */

#include <stdio.h>
//#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <err.h>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
#include <stdexcept>
#include <sstream>
#include <string>
#include <iostream>

#define _FILE_OFFSET_BITS 64 // for portability reasons on 32bits systems.

//#define DEBUG

#ifdef DEBUG
#include <string.h>
#include <iostream>
using namespace std;
#endif

#include "FqMainBackend.h"
#include "srp.h"


FqMainBackend::FqMainBackend(srp * io_sr):FqBaseBackend() {
    p_auxFqProcessor=NULL;
    p_scoreReadStruct=io_sr;
}


void FqMainBackend::processFile(char * filename,unsigned char f_id) {
    FILE * fp;
    int st,s;
    int cnt,nread;
    long cur_offset;
    // int nb_read_oper=0;
    mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
    //cout<<"INT_MAX="<<INT_MAX<<endl;
    // cout<<filename<<endl;
    this->i_filename=filename;
    this->f_id=f_id;
    
    int f_single=open(filename,O_RDONLY,mode);
    if (f_single==-1) {
        err(errno,"cannot open file: %s.",filename);
    }

    fp=fdopen(f_single,"r");
    if (fp==NULL) {
        err(errno,"cannot open file: %s.",filename);
    }
    //cur_offset=ftell(fp);
    //cout<<"cur_offset="<<cur_offset<<endl;
    char* buf=(char *) malloc(FqBaseBackend::bufsize*sizeof(char));
    if (buf==NULL) {
        err(errno,"cannot allocate memory: %lu bytes.",bufsize);
    }
    /* for each read, we want : offset, filenb, total quality score. */
    while ((nread=read(f_single,buf,FqBaseBackend::bufsize))!=0) {
        // nb_read_oper++;
        //cout<<"BE1: read operation nbr: "<<nb_read_oper<<endl;
        //printf("going to call ftell on : %p \n",fp);
        //cur_offset=ftell(fp); // don't know why but it seems that this doesn't work under linux; always returns 0...
        cur_offset = lseek(f_single, 0, SEEK_CUR);
        //cout<<"cur_offset="<<cur_offset<<endl;
        processBuf((char *)buf,nread,f_id,cur_offset);
    }
    close(f_single);
    free(buf);
}


#ifdef DEBUG

#define init_debug 1
#define on_record_new 2
#define on_record_end 3
#define on_buf_end 4
#define on_line_end 5
#define on_store_read_id 6
#define on_record_end_pe 7

void FqMainBackend::debug_processBuf(int evt,int& cnt,int& nread,char * pchar,unsigned long& rstart_offset) { // put all the bebug stuff in a separate method so that processBuf is shorter and more "lisible".
    static int nb_start; // number of fq record start and stop found in current buffer.
    static int nb_stop;
    static int tot_nb_stop=0;
    static int tot_nb_start=0;
    static int idx_id;
    static int is_id=0;
    if (cnt==0) {
        nb_start=0;
        nb_stop=0;
    }

    switch(evt) {
        case on_record_new:
            cout<<"nread="<<nread<<endl;
            cout<<"cnt="<<cnt<<endl;
            cout<<"rstart_offset="<<rstart_offset<<endl;
            is_id=1;
            idx_id=0;
            nb_start++;
            break;
        case on_line_end:
            is_id=0;
            break;
        case on_record_end:
            nb_stop++;
            cout<<"just finished processing : "<<endl;
            cout<<current_id<<endl;
            cout<<"j obttained: rstart_offset/UINT_MAX="<<rstart_offset/UINT_MAX<<endl;
            break;
        case on_store_read_id:
            if (is_id==1) {
                if (*pchar!='/' and *pchar !=' ') current_id[idx_id]=*pchar;
                else current_id[idx_id]='\0';
                idx_id+=1;
            }
            break;
        case on_buf_end:
            std::cout<<" Main BE position in buffer "<<cnt<<endl;
            assert(cnt==nread);
            cout<<"Main BE Found "<<nb_start<<" and "<<nb_stop<<" start/stop of record in this buffer."<<endl;
            tot_nb_start+=nb_start;
            tot_nb_stop+=nb_stop;
            cout<<"Main BE Total cumulated number of start: "<<tot_nb_start<<" Total number of stop: "<<tot_nb_stop<<endl;
            break;
        case on_record_end_pe:
            cout<<p_auxFqProcessor->current_id<<endl;
            assert(strcmp(current_id,p_auxFqProcessor->current_id)==0);
            break;
        }
    }
#endif


void FqMainBackend::processBuf(char * buf,int nread,unsigned char f_id,unsigned long cur_offset) {
    int cnt=0;
    unsigned int s;
    static unsigned int st;
    static int num_l_in_rec; /* counter to know on which line inside the fastq record we are */
    static int qual_score=0;
    static unsigned long rstart_offset;
#ifdef DEBUG
    debug_processBuf(init_debug,cnt,nread,NULL,rstart_offset);
#endif
    char * pchar=buf;
    while (cnt<=nread-1) {
        switch (*pchar){
            case k_read_id_start:
                if (qual_score) goto inc_score;
                else {
                    rstart_offset=cur_offset-nread+cnt;
#ifdef DEBUG
    debug_processBuf(on_record_new,cnt,nread,NULL,rstart_offset);
#endif
                    num_l_in_rec=1; }
                    break;
            case k_read_qual_start:
                qual_score=1;
                st=0;
                break;
            case '\n':
#ifdef DEBUG
    debug_processBuf(on_line_end,cnt,nread,pchar,rstart_offset);
#endif
                num_l_in_rec+=1;
                if (num_l_in_rec==5) {
                    qual_score=0;/* end of fastq record */
                    rpos rp=init_rpos(f_id,rstart_offset);
                    if (p_auxFqProcessor!=NULL) {
                        rinfo pe2info;
                        int eof=p_auxFqProcessor->getNextRead(&pe2info);
                        assert(!eof); // There should be the same number of reads in both PE files.
                        st+=pe2info.score;
                        unsigned long j=rstart_offset/UINT_MAX;
                        update_rpos(pe2info.f_id,pe2info.rstart_offset,j,&rp);
#ifdef DEBUG
    debug_processBuf(on_record_end_pe,cnt,nread,pchar,rstart_offset);
#endif
                    }
                    i_dim& ref_i_dim=(*p_scoreReadStruct)[st/K_SCORE_NORM_FACTOR];
                    k_dim& ref_k_dim=ref_i_dim[rstart_offset/UINT_MAX];
#ifdef DEBUG
    debug_processBuf(on_record_end,cnt,nread,pchar,rstart_offset);
#endif
                    ref_k_dim.push_back(rp);
                 }
                break;
            default:
#ifdef DEBUG
            store_id: debug_processBuf(on_store_read_id,cnt,nread,pchar,rstart_offset);
#endif
            inc_score:
                { if (qual_score==1) {
                    s=(int)*pchar;
                    s-=k_phred_32;
                    st+=s;
                    }
                }
                break;
        }
        pchar++;
        cnt++;
    }
#ifdef DEBUG
    debug_processBuf(on_buf_end,cnt,nread,pchar,rstart_offset);
#endif
}