-
Veronique Legrand authoredVeronique Legrand authored
FqBaseBackend.h 4.48 KiB
/*
* FqBaseBackend.h
*
* Created on: Jan 20, 2016
* Author: vlegrand
*/
#ifndef FQBASEBACKEND_H_
#define FQBASEBACKEND_H_
#include <cstdio>
#include <stdlib.h>
#include "FqConstants.h"
#include "srp.h"
#include "ROCKparams.h"
//#define init_debug 1
/*
#define on_record_new 2
#define on_record_end 3
#define on_buf_end 4
#define on_line_end 5
#define on_store_read_id 6
#define on_record_end_pe 7*/ // keep that for later use (implementing the logger).
/*
* Auxilliary structure for buffer processing.
*/
typedef struct {
int cnt; // number of char already processed in buffer
char * pchar; // point on current char in buffer
int real_bufsize; // total number of char in buffer
char * buf; // pointer to start of buffer
char * p_start_cur_rec; // pointer to the start of the current record.
}T_buf_info;
/*
* Auxilliary structure for fastq parsing; gather here information on the fastq record before we can put it inthe srp data structure.
*/
typedef struct {
unsigned long rstart_offset; // fq record start offset in file.
int nb_k_mers_in_error; // number of k-mers that contain nucleotides whose quality score is below given threshold in PE1 or single read.
int nb_k_mers_in_error_in_PE2; // number k-mers that contain nucleotides whose quality score is below given threshold in PE2.
unsigned int nb_nucleotides_in_read; // number of nucleotides in read (single or PE1)
unsigned int nb_nucleotides_in_read_PE2; // number of nucleotides in PE2.
unsigned int st; // total read score (sum of the nucleotides quality score).
unsigned int idx_nucl_in_read;
}T_fq_rec_info;
T_buf_info init_buf_info(int& nread,char * buf);
class FqBaseBackend {
// I could compute offset myself since I am reading caracters from a text file... Wouldn't gain much; performance bottleneck is not here.
protected:
static const size_t bufsize=6048000;
// handling input
char * i_filename;
unsigned char f_id;
int i_f_desc;
// for writing output (filtered reads)
char * o_filename;
int o_f_desc;
char * o_buf;
char * pos_in_w_buf;
// for writing undefined (reads that don't contain a sufficient number of correct k-mers)
// correct k-mers are k-mers that contain only nucleotides with a quality score greater than a given threshold (default is 0).
// expected minimum number of correct k-mers is provided by the user. Default is 1
char * undef_filename;
int undef_f_desc;
static FasqQualThreshold qual_thres;
char cur_fq_record[MAX_FQ_RECORD_LENGTH];
void onIncScore(T_fq_rec_info& rec_info,T_buf_info& buf_info,int& n);
// void debug_processBuf(int evt,const T_buf_info& buf_info,const unsigned long& rstart_offset);
friend void processPEFiles(char *, unsigned char,char * , unsigned char,srp *,char *,char *, size_t);
/* for testing only */
int test_mode;
size_t test_bufsize;
void setTestMode(size_t new_buf_size) {
test_mode=1;
test_bufsize=new_buf_size;
}
friend void test_processInputFiles();
friend void test_write_PE();
public:
FqBaseBackend() {
i_filename=NULL;
i_f_desc=-1;
f_id=0;
o_f_desc=-1;
o_filename=NULL;
undef_f_desc=-1;
undef_filename=NULL;
o_buf=NULL;
pos_in_w_buf=NULL;
strcpy(cur_fq_record,"");
test_mode=0;
test_bufsize=0;
}
~FqBaseBackend() {
if (o_buf!=NULL) {
free(o_buf);
o_buf=NULL;
}
}
void openInputFile(char * ficname, unsigned char id);
void openInputFile();
void closeInputFile();
int getRead(const unsigned long&,char *);
void setOutputFile(char * ofilename);
void openOutputFile();
void writeToOutput(const unsigned long&);
void closeOutputFile();
void setUndefFile(char * ficname);
void openUndefFile();
void writeStrToUndefFile(char * start_in_buf, int len);
void writeToUndefFile(const T_buf_info& buf_info,const int& addCR=0);
void closeUndefFile();
static void setQualThreshold(const FasqQualThreshold& a_qual_thres){
FqBaseBackend::qual_thres.min_correct_k_mers_in_read=a_qual_thres.min_correct_k_mers_in_read;
FqBaseBackend::qual_thres.nucl_score_threshold=a_qual_thres.nucl_score_threshold;
FqBaseBackend::qual_thres.k=a_qual_thres.k;
}
void openFile4Output(char * filename, int * f_desc);
void keepCurFastqRecord(char * buf,const int& start_rec_in_buf,const int &nread);
};
#endif /* FQBASEBACKEND_H_ */