Skip to content
Snippets Groups Projects
FqBaseBackend.h 4.48 KiB
/*
 * FqBaseBackend.h
 *
 *  Created on: Jan 20, 2016
 *      Author: vlegrand
 */

#ifndef FQBASEBACKEND_H_
#define FQBASEBACKEND_H_

#include <cstdio>
#include <stdlib.h>

#include "FqConstants.h"
#include "srp.h"
#include "ROCKparams.h"

//#define init_debug 1
/*
#define on_record_new 2
#define on_record_end 3
#define on_buf_end 4
#define on_line_end 5
#define on_store_read_id 6
#define on_record_end_pe 7*/ // keep that for later use (implementing the logger).

/*
 * Auxilliary structure for buffer processing.
 */
typedef struct {
    int cnt; // number of char already processed in buffer
    char * pchar; // point on current char in buffer
    int real_bufsize; // total number of char in buffer
    char * buf; // pointer to start of buffer
    char * p_start_cur_rec; // pointer to the start of the current record.
}T_buf_info;

/*
 * Auxilliary structure for fastq parsing; gather here information on the fastq record before we can put it inthe srp data structure.
 */
typedef struct {
    unsigned long rstart_offset; // fq record start offset in file.
    int nb_k_mers_in_error; // number of k-mers that contain nucleotides whose quality score is below given threshold in PE1 or single read.
    int nb_k_mers_in_error_in_PE2; // number k-mers that contain nucleotides whose quality score is below given threshold in PE2.
    unsigned int nb_nucleotides_in_read; // number of nucleotides in read (single or PE1)
    unsigned int nb_nucleotides_in_read_PE2; // number of nucleotides in PE2.
    unsigned int st; // total read score (sum of the nucleotides quality score).
    unsigned int idx_nucl_in_read;
}T_fq_rec_info;

T_buf_info init_buf_info(int& nread,char * buf);

class FqBaseBackend {

    // I could compute offset myself since I am reading caracters from a text file... Wouldn't gain much; performance bottleneck is not here.

protected:
    static const size_t bufsize=6048000;

    // handling input
    char * i_filename;
    unsigned char f_id;
    int i_f_desc;

    // for writing output (filtered reads)
    char * o_filename;
    int o_f_desc;
    char * o_buf;
    char * pos_in_w_buf;
    // for writing undefined (reads that don't contain a sufficient number of correct k-mers)
    // correct k-mers are k-mers that contain only nucleotides with a quality score greater than a given threshold (default is 0).
    // expected minimum number of correct k-mers is provided by the user. Default is 1
    char * undef_filename;
    int undef_f_desc;

    static FasqQualThreshold qual_thres;

    char cur_fq_record[MAX_FQ_RECORD_LENGTH];

    void onIncScore(T_fq_rec_info& rec_info,T_buf_info& buf_info,int& n);
    // void debug_processBuf(int evt,const T_buf_info& buf_info,const unsigned long& rstart_offset);

    friend void processPEFiles(char *, unsigned char,char * , unsigned char,srp *,char *,char *, size_t);

    /* for testing only */
    int test_mode;
    size_t test_bufsize;

    void setTestMode(size_t new_buf_size) {
        test_mode=1;
        test_bufsize=new_buf_size;
    }

    friend void test_processInputFiles();
    friend void test_write_PE();

public:

    FqBaseBackend() {
        i_filename=NULL;
        i_f_desc=-1;
        f_id=0;
        o_f_desc=-1;
        o_filename=NULL;
        undef_f_desc=-1;
        undef_filename=NULL;

        o_buf=NULL;
        pos_in_w_buf=NULL;
        strcpy(cur_fq_record,"");
        test_mode=0;
        test_bufsize=0;
    }

    ~FqBaseBackend() {
        if (o_buf!=NULL) {
            free(o_buf);
            o_buf=NULL;
        }
    }

    void openInputFile(char * ficname, unsigned char id);
    void openInputFile();
    void closeInputFile();
    int getRead(const unsigned long&,char *);
    void setOutputFile(char * ofilename);
    void openOutputFile();
    void writeToOutput(const unsigned long&);
    void closeOutputFile();
    void setUndefFile(char * ficname);
    void openUndefFile();
    void writeStrToUndefFile(char * start_in_buf, int len);
    void writeToUndefFile(const T_buf_info& buf_info,const int& addCR=0);
    void closeUndefFile();

    static void setQualThreshold(const FasqQualThreshold& a_qual_thres){
        FqBaseBackend::qual_thres.min_correct_k_mers_in_read=a_qual_thres.min_correct_k_mers_in_read;
        FqBaseBackend::qual_thres.nucl_score_threshold=a_qual_thres.nucl_score_threshold;
        FqBaseBackend::qual_thres.k=a_qual_thres.k;
    }

    void openFile4Output(char * filename, int * f_desc);
    void keepCurFastqRecord(char * buf,const int& start_rec_in_buf,const int &nread);
};



#endif /* FQBASEBACKEND_H_ */