Skip to content
Snippets Groups Projects
unit_test_fqreader.cpp 9.82 KiB
/*
 * unit_test_fqreader.cpp
 *
 *  Created on: Dec 8, 2015
 *      Author: vlegrand
 *      unit and non regression testing for the fqreader component.
 *      Keep using assert for the moment, don't want to add a dependency on boost (or any other test framework) just for the tests.
 */
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
#include "rock_commons.h"
#include "FqConstants.h"
#include "srp.h"
#include "FqMainBackend.h"
#include "fqreader.h"

// TODO : Add test case where @ character is inside quality score.

using namespace std;

void test_processSingleFile() {
    //printf("MAX_UINT=%u \n",UINT_MAX);
    srp sr;
    unsigned char f_id=1;
    processSingleFile((char *) "../test/data/unit/test_single.fq",f_id,&sr);
    srp::reverse_iterator rit;
    i_dim::iterator it_offs;
    k_dim::iterator it_struct;

    int cnt_read=0;

    for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores).
        //cout << "score="<<rit->first<<endl;
        unsigned long score=rit->first;
        assert(score==5);
        for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) {
            unsigned long offset_quotient=it_offs->first;
            cout<<"offset_quotient="<<offset_quotient<<endl;
	    assert(offset_quotient==0);
            for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) {
                unsigned char fid_stored=it_struct->fileid;
                assert(fid_stored >>4==f_id);
                if (cnt_read==0) assert(it_struct->read_a1==0);
                if (cnt_read==1) assert(it_struct->read_a1==350);
                if (cnt_read==2) assert(it_struct->read_a1==699);
                if (cnt_read==3) assert(it_struct->read_a1==1049);
                if (cnt_read==4) assert(it_struct->read_a1==1400);
                if (cnt_read==5) assert(it_struct->read_a1==1751);
                cnt_read++;
                /*int tmp=fid_stored >>4;
                cout<<" fileid="<<tmp<<" read_a1="<<it_struct->read_a1<<endl;*/
            }
        }
    }
    assert(cnt_read==6);
}

/*
 * Test case with other data than those I had until here.
 * Quality score contains '@'caracter (usually start of fastq record),
 * reads are longer,
 * id and '+' line contain additional information.
 */
void test_processPEfilesWithA() {

    char * fq_3_test=(char *) "../test/data/unit/klebsiella_PE1.fq";
    char * fq_4_test=(char *) "../test/data/unit/klebsiella_PE2.fq";

    unsigned char f_id3=3;
    unsigned char f_id4=4;

    srp sr;
    processPEFiles(fq_3_test, f_id3,fq_4_test, f_id4,&sr);
    srp::reverse_iterator rit;
    i_dim::iterator it_offs;
    k_dim::iterator it_struct;
    int cnt_read=0;

    unsigned char masque=0x0F;

    for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores).
        cout << "score="<<rit->first<<endl;
        unsigned long score=rit->first;
        /*if (cnt_read==0 || cnt_read==1) assert(score==10);
        if (cnt_read==2) assert(score==9);*/
        for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) {
            unsigned long offset_quotient=it_offs->first;
            assert(offset_quotient==0);
            for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) {
                unsigned char fid_stored=it_struct->fileid;
                assert(fid_stored >>4==f_id3);
                assert((fid_stored &masque)==f_id4);
                if (cnt_read==0) {
                    assert(it_struct->read_a1==0);
                    assert(it_struct->read_a1==0);
                }
                if (cnt_read==6) {
                    // std::cout<<it_struct->read_a1<<" "<<it_struct->read_a2;
                    assert(score==18);
                    assert(it_struct->read_a1==558);
                    assert(it_struct->read_a2==-2);
                }
                if (cnt_read==3) {
                    // std::cout<<it_struct->read_a1<<" "<<it_struct->read_a2;
                    assert(score==19);
                    assert(it_struct->read_a1==1114);
                    assert(it_struct->read_a2==0);
                }
                cnt_read++;

                int tmp1=fid_stored >>4;
                int tmp2=fid_stored &masque;
                cout<<" fileid1="<<tmp1<<" read_a1="<<it_struct->read_a1<<endl;
                cout<<" fileid2="<<tmp2<<" read_a2="<<it_struct->read_a2<<endl;
            }
        }
    }
    assert(cnt_read==10);

}

void test_processPEFiles() {
    char * fq_1_test=(char *) "../test/data/unit/test_PE1.fq";
    char * fq_2_test=(char *) "../test/data/unit/test_PE2.fq";

    unsigned char f_id1=1;
    unsigned char f_id2=2;

    srp sr;

    processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr);
    srp::reverse_iterator rit;
    i_dim::iterator it_offs;
    k_dim::iterator it_struct;
    int cnt_read=0;
    unsigned char masque=0x0F;

    for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores).
        // cout << "score="<<rit->first<<endl;
        unsigned long score=rit->first;
        if (cnt_read==0 || cnt_read==1) assert(score==10);
        if (cnt_read==2) assert(score==9);
        for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) {
            unsigned long offset_quotient=it_offs->first;
            assert(offset_quotient==0);
            for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) {
                unsigned char fid_stored=it_struct->fileid;
                assert(fid_stored >>4==f_id1);
                assert((fid_stored &masque)==f_id2);
                if (cnt_read==0) {
                    assert(it_struct->read_a1==0);
                    assert(it_struct->read_a1==0);
                }
                if (cnt_read==1) {
                    std::cout<<it_struct->read_a1<<" "<<it_struct->read_a2;
                    assert(it_struct->read_a1==349);
                    assert(it_struct->read_a2==0);
                }
                if (cnt_read==2) {
                    std::cout<<it_struct->read_a1<<" "<<it_struct->read_a2;
                    assert(it_struct->read_a1==698);
                    assert(it_struct->read_a2==0);
                }
                cnt_read++;
                
                int tmp1=fid_stored >>4;
                int tmp2=fid_stored &masque;
                cout<<" fileid1="<<tmp1<<" read_a1="<<it_struct->read_a1<<endl;
                cout<<" fileid2="<<tmp2<<" read_a2="<<it_struct->read_a2<<endl;
            }
        }
    }
    assert(cnt_read==3);
}


void check_processAIFilesResults(srp& sr) {
    srp::reverse_iterator rit;
    i_dim::iterator it_offs;
    k_dim::iterator it_struct;
    int cnt_read=0;
    for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores).
        //cout << "score="<<rit->first<<endl;
        unsigned long score=rit->first;
        if (cnt_read==0 || cnt_read==1) assert(score==10);
        else if (cnt_read==2) assert(score==9);
        else {
            //cout << "score="<<rit->first<<endl;
            assert(score==5);
        }
        for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) {
            unsigned long offset_quotient=it_offs->first;
            assert(offset_quotient==0);
            for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) {
                cnt_read++;
            }
        }
    }
    assert(cnt_read==9);
}

void test_processAllFiles() {
    char * fq_1_test=(char *) "../test/data/unit/test_PE1.fq";
    char * fq_2_test=(char *) "../test/data/unit/test_PE2.fq";
    char * fq_single=(char *) "../test/data/unit/test_single.fq";

    unsigned char f_id1=1;
    unsigned char f_id2=2;
    unsigned char f_single=3;

    srp sr;

    processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr);
    processSingleFile(fq_single,f_single,&sr);

    check_processAIFilesResults(sr);
}


void test_processInputFiles() {
    char * fq_1_test=(char *) "../test/data/unit/test_PE1.fq";
    char * fq_2_test=(char *) "../test/data/unit/test_PE2.fq";
    char * fq_single=(char *) "../test/data/unit/test_single.fq";

    IO_fq_files s;
    s.in_fq_file=fq_single;

    vector<IO_fq_files> v_single;
    v_single.push_back(s);

    PE_files pe;
    pe.PE1.in_fq_file=fq_1_test;
    pe.PE2.in_fq_file=fq_2_test;
    vector<PE_files> v_pe;
    v_pe.push_back(pe);

    srp sr;
    
    FqBaseBackend * array_be[k_max_input_files];
    processInputFiles(v_single,v_pe,array_be,&sr);

    // check that result is correct in sr.
    check_processAIFilesResults(sr);

    // check that the 3 backends are correct
    FqMainBackend * pbe=(FqMainBackend *) array_be[0]; // TODO see if one can use check_case, static_cast or one of them if they are not in boost.
    FqAuxBackend * pbe2=(FqAuxBackend *) array_be[2];
    FqMainBackend * pbe3=(FqMainBackend *) array_be[1];
    
    assert(strcmp(pbe->i_filename,fq_single)==0);
    assert(pbe->f_id==1);
    assert(pbe->p_auxFqProcessor==NULL);

    assert(pbe3->p_auxFqProcessor==pbe2);
    assert(strcmp(pbe3->i_filename,fq_1_test)==0);
    assert(pbe3->f_id==2);

    assert(strcmp(pbe2->i_filename,fq_2_test)==0);
    assert(pbe2->f_id==3);


    int i;
    for (i=0;i<3;i++) delete array_be[i];
    //free(array_be);

}


int main(int argc, char **argv) {
    cout<<"test for single file"<<endl;
    test_processSingleFile();
    cout<<"test for PE files"<<endl;
    test_processPEFiles();
    cout<<"test the case of records that contain @ character in quality score"<<endl;
    test_processPEfilesWithA();
    cout<<"test for both single and PE files"<<endl;
    test_processAllFiles(); /* mix PE together with single; nearly as in real life.*/
    cout<<"testing higher level function processInputFiles"<<endl;
    test_processInputFiles();
    cout<<"done"<<endl;
}