diff --git a/src/FqMainBackend.cpp b/src/FqMainBackend.cpp index a76d6a8de6ea20ce8a57be60b1a26ab0a913e529..025e505617583e25fdb20d1b9d618b3dc328004a 100644 --- a/src/FqMainBackend.cpp +++ b/src/FqMainBackend.cpp @@ -108,7 +108,8 @@ void FqMainBackend::onEndFastqRecord(T_fq_rec_info& rec_info,const T_buf_info& b nb_k_mer+=nb_k_mer_PE2; (nb_k_mer-rec_info.nb_k_mers_in_error-rec_info.nb_k_mers_in_error_in_PE2>=qual_thres.min_correct_k_mers_in_read)?ref_k_dim.push_back(rp):writeToUndefFile(bufinfo); } else { - ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2>=1) && (nb_k_mer-rec_info.nb_k_mers_in_error>=1))?ref_k_dim.push_back(rp):writeToUndefFile(bufinfo); + // ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2>=1) && (nb_k_mer-rec_info.nb_k_mers_in_error>=1))?ref_k_dim.push_back(rp):writeToUndefFile(bufinfo); + ((nb_k_mer_PE2-rec_info.nb_k_mers_in_error_in_PE2>=qual_thres.min_correct_k_mers_in_read) && (nb_k_mer-rec_info.nb_k_mers_in_error>=qual_thres.min_correct_k_mers_in_read))?ref_k_dim.push_back(rp):writeToUndefFile(bufinfo); } // empty buffer keeping current record cur_fq_record[0]='\0'; diff --git a/src/ROCKparams.cpp b/src/ROCKparams.cpp index db1fb947f366da56d8cf66ba423ad35a58e08dd9..7b9b08692822ca5d33e3cf0fff349c0a187e498e 100644 --- a/src/ROCKparams.cpp +++ b/src/ROCKparams.cpp @@ -339,10 +339,10 @@ void ROCKparams::initFromMainOptsArgs(int argc,char ** argv) { default: usage(EXIT_FAILURE); break; } } - if (!PE_as_single && qual_thres.min_correct_k_mers_in_read!=1) { + /*if (!PE_as_single && qual_thres.min_correct_k_mers_in_read!=1) { cout<<"Incompatible options: when PE are processed independently, minimum number of correct k-mers in each end is 1."<<endl; usage(EXIT_FAILURE); - } + }*/ parms.filter_PE_as_single=PE_as_single; //cout<<PE_as_single<<endl; processMainArgs(optind, argc, argv,v_input_lines); diff --git a/src/unit_test_fqreader.cpp b/src/unit_test_fqreader.cpp index 536ad498cf0750335cc36fa8b9e8a4714ebe75dc..d9771e4e4828f6f6d6cdd8dd609eddfdf54ffce5 100644 --- a/src/unit_test_fqreader.cpp +++ b/src/unit_test_fqreader.cpp @@ -448,6 +448,108 @@ void test_processPE_not_as_single2() { } +void test_processPE_not_as_singleWithMQ() { + char * fq_1_test=(char *) "../test/data/unit/fake_PE1.fq"; + char * fq_2_test=(char *) "../test/data/unit/fake_PE2.fq"; + + char * fq_1_test_undef=(char *) "../test/data/unit/fake_PE1.undef.fastq"; + char * fq_2_test_undef=(char *) "../test/data/unit/fake_PE2.undef.fastq"; + + unsigned char f_id1=1; + unsigned char f_id2=2; + srp sr; + + FasqQualThreshold qual_thres; + qual_thres.k=30; + qual_thres.min_correct_k_mers_in_read=64; + qual_thres.nucl_score_threshold=10+k_phred_32; + + FqBaseBackend::setQualThreshold(qual_thres); + FqMainBackend::setTreatPEAsSingle(0); + processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,fq_1_test_undef,fq_2_test_undef,1000); + + srp::reverse_iterator rit; + i_dim::iterator it_offs; + k_dim::iterator it_struct; + int cnt_read=0; + + for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). + //unsigned long score=rit->first; + for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { + for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { + cnt_read++; + } + } + } + assert(cnt_read==1); + struct stat fileStat; + stat(fq_1_test_undef, &fileStat); + assert(fileStat.st_size==0); + stat(fq_2_test_undef, &fileStat); + assert(fileStat.st_size==0); + sr.clear(); + + qual_thres.k=30; + qual_thres.min_correct_k_mers_in_read=45; + qual_thres.nucl_score_threshold=15+k_phred_32; + + FqBaseBackend::setQualThreshold(qual_thres); + FqMainBackend::setTreatPEAsSingle(0); + processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,fq_1_test_undef,fq_2_test_undef,1000); + + + + cnt_read=0; + + for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). + //unsigned long score=rit->first; + for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { + for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { + cnt_read++; + } + } + } + assert(cnt_read==0); + + sr.clear(); + + assert(compareFilesLileByLine(fq_1_test_undef,fq_1_test)==0); + assert(compareFilesLileByLine(fq_2_test_undef,fq_2_test)==0); + + qual_thres.k=30; + qual_thres.min_correct_k_mers_in_read=13; + qual_thres.nucl_score_threshold=18+k_phred_32; + + FqBaseBackend::setQualThreshold(qual_thres); + FqMainBackend::setTreatPEAsSingle(0); + processPEFiles(fq_1_test, f_id1,fq_2_test, f_id2,&sr,fq_1_test_undef,fq_2_test_undef,1000); + + + cnt_read=0; + + for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). + //unsigned long score=rit->first; + for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { + for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { + cnt_read++; + } + } + } + assert(cnt_read==1); + sr.clear(); + + + stat(fq_1_test_undef, &fileStat); + assert(fileStat.st_size==0); + stat(fq_2_test_undef, &fileStat); + assert(fileStat.st_size==0); + + + assert(remove((char *) fq_1_test_undef)==0); + assert(remove((char *) fq_2_test_undef)==0); + +} + void test_processInputFiles() { char * fq_1_test=(char *) "../test/data/unit/test_PE1.fq"; @@ -722,5 +824,7 @@ int main(int argc, char **argv) { test_processPE_not_as_single(); cout<<"test processing PE files with lots of errors in PE2; nucleotide quality threshold and PE not treated as single option."<<endl; test_processPE_not_as_single2(); + cout<<"test processing PE files with PE not treated as single; quality score threshold and minimum number of valid k-mer."<<endl; + test_processPE_not_as_singleWithMQ(); cout<<"done"<<endl; } diff --git a/test/Makefile.am b/test/Makefile.am index 029c5f5c32360ecd57a73a303c38f4e4e36ac03a..cbaa00857f238d2e60437735cc67c7625da61dc4 100755 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -17,8 +17,9 @@ data/unit/09-4607_S43_R2_big.fastq data/unit/thres_100_09-4607_S43_R1.undef.fast data/unit/thres_100_09-4607_S43_R2.undef.fastq data/unit/thres_180_09-4607_S43_R1.undef.fastq \ data/unit/thres_180_09-4607_S43_R2.undef.fastq data/fastq.raw/klebsiella_5_1_bad_scores.fq \ data/fastq.raw/klebsiella_5_2_bad_scores.fq data/iofiles.args/output_files_2_PE_separated.txt \ -data/unit/test_PE1_PE_not_as_single.fq data/unit/test_PE2_PE_not_as_single.fq data/unit/expected_PE1_PE_not_as_single.undef.fastq\ -data/unit/expected_PE2_PE_not_as_single.undef.fastq \ -data/unit/test_PE1_PE_not_as_single_pathological.fq data/unit/test_PE2_PE_not_as_single_pathological.fq +data/unit/test_PE1_PE_not_as_single.fq data/unit/test_PE2_PE_not_as_single.fq \ +data/unit/expected_PE1_PE_not_as_single.undef.fastq data/unit/expected_PE2_PE_not_as_single.undef.fastq \ +data/unit/test_PE1_PE_not_as_single_pathological.fq data/unit/test_PE2_PE_not_as_single_pathological.fq \ +data/unit/fake_PE1.fq data/unit/fake_PE2.fq diff --git a/test/data/unit/fake_PE1.fq b/test/data/unit/fake_PE1.fq new file mode 100644 index 0000000000000000000000000000000000000000..3e855a2da1190222ddccbfd0e0e1aa30545764a7 --- /dev/null +++ b/test/data/unit/fake_PE1.fq @@ -0,0 +1,4 @@ +@SRR1222430.1378154 1378154 length=94 +CAGGAACCGCCTTCTGGTGATTTGCAAGAACGCAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTT ++SRR1222430.1378154 1378154 length=94 +CCCCDFFDCDDCGGGGGGGGGGHHHHHHHHHGEGGCHFGEBFEHHHGGEGGHHHHHHGHHHGHGGEGFGGGE/FGHGGHHHHGGGHGHGGHHH diff --git a/test/data/unit/fake_PE2.fq b/test/data/unit/fake_PE2.fq new file mode 100644 index 0000000000000000000000000000000000000000..3e855a2da1190222ddccbfd0e0e1aa30545764a7 --- /dev/null +++ b/test/data/unit/fake_PE2.fq @@ -0,0 +1,4 @@ +@SRR1222430.1378154 1378154 length=94 +CAGGAACCGCCTTCTGGTGATTTGCAAGAACGCAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTT ++SRR1222430.1378154 1378154 length=94 +CCCCDFFDCDDCGGGGGGGGGGHHHHHHHHHGEGGCHFGEBFEHHHGGEGGHHHHHHGHHHGHGGEGFGGGE/FGHGGHHHHGGGHGHGGHHH diff --git a/test/rock.sh b/test/rock.sh index 59a3ba5a4b0d5890b98d8d33ba99242702a1b76a..7c30d3ee38072b11103beed10d965345d7f368dd 100755 --- a/test/rock.sh +++ b/test/rock.sh @@ -65,7 +65,7 @@ echo "doing more checks on options and error messages" ../src/rock -C 500 -c 400 -k 10 -l 12 -n 85000000 -p -i ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100.txt -o ${srcdir}/data/iofiles.args/extract_klebsiella_long_reads_100_filtered.txt|grep "options are mutually exclusive">/dev/null || exit 14 ../src/rock -C 500 -c 400 -k 10 -q 3 -m 0 -p -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "minimum number of valid k-mer for keeping a read must be an integer">/dev/null || exit 15 ../src/rock -C 500 -c 400 -k 10 -q -1 -m 2 -p -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "q must be">/dev/null || exit 16 -../src/rock -C 500 -c 400 -k 10 -q 2 -m 2 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "Incompatible options">/dev/null || exit 161 +#../src/rock -C 500 -c 400 -k 10 -q 2 -m 2 -i ${srcdir}/test/data/iofiles.args/extract_klebsiella_long_reads_100.txt|grep "Incompatible options">/dev/null || exit 161 # here check that we have enough memory for running the tests. ../src/unit_test_cms CHECK_MEM diff --git a/test/rock_mem.sh b/test/rock_mem.sh index 02455ecef2ee0ae16c43d99d1adc9b9d275f9e93..e21876fd4db67926643a9fec03757fd2e4a73907 100755 --- a/test/rock_mem.sh +++ b/test/rock_mem.sh @@ -255,11 +255,86 @@ rm -f tmp/klebsiella_5_1_2_qual_thres.fq rm -f tmp/klebsiella_5_2_2_qual_thres.fq +echo " " +echo "##################################################################################" +echo "testing ROCK with a quality score threshold for nucleotides, PE processed separately and -m option" +../src/rock -C 100 -c 1 -l 2 -q 2 -m 2 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated.txt ${srcdir}/data/fastq.raw/klebsiella_5_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_5_2_bad_scores.fq || exit 202 + +[ -f "tmp/klebsiella_5_1_bad_scores.undefined.fq" ] || exit 20200 +[ -f "tmp/klebsiella_5_2_bad_scores.undefined.fq" ] || exit 20201 + +ret1=`cat tmp/klebsiella_5_1_bad_scores.undefined.fq|wc -l` +ret2=`cat tmp/klebsiella_5_2_bad_scores.undefined.fq|wc -l` + +test $ret1=8 || exit 202100 +test $ret1=8 || exit 202101 + +ret1=`grep -c SRR1222430.1 tmp/klebsiella_5_1_bad_scores.undefined.fq` +ret2=`grep -c SRR1222430.1 tmp/klebsiella_5_2_bad_scores.undefined.fq` + +test $ret1=2 || exit 202200 +test $ret1=2 || exit 202201 + +ret1=`grep -c SRR1222430.5 tmp/klebsiella_5_1_bad_scores.undefined.fq` +ret2=`grep -c SRR1222430.5 tmp/klebsiella_5_2_bad_scores.undefined.fq` + +test $ret1=2 || exit 202300 +test $ret1=2 || exit 202301 + +ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_bad_scores.undefined.fq` +ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_bad_scores.undefined.fq` + +test $ret1=0 || exit 202400 +test $ret1=0 || exit 202401 + +ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_bad_scores.undefined.fq` +ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_bad_scores.undefined.fq` + +test $ret1=0 || exit 202500 +test $ret1=0 || exit 202501 + +ret1=`grep -c SRR1222430.4 tmp/klebsiella_5_1_bad_scores.undefined.fq` +ret2=`grep -c SRR1222430.4 tmp/klebsiella_5_2_bad_scores.undefined.fq` + +test $ret1=0 || exit 202600 +test $ret1=0 || exit 202601 + +ret1=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` +ret2=`cat tmp/klebsiella_5_1_2_qual_thres.fq|wc -l` + +test $ret1=12 || exit 202700 +test $ret1=12 || exit 202701 + +ret1=`grep -c SRR1222430.2 tmp/klebsiella_5_1_2_qual_thres.fq` +ret2=`grep -c SRR1222430.2 tmp/klebsiella_5_2_2_qual_thres.fq` + +test $ret1=2 || exit 202800 +test $ret1=2 || exit 202801 + +ret1=`grep -c SRR1222430.3 tmp/klebsiella_5_1_2_qual_thres.fq` +ret2=`grep -c SRR1222430.3 tmp/klebsiella_5_2_2_qual_thres.fq` + +test $ret1=2 || exit 202802 +test $ret1=2 || exit 202803 + +ret1=`grep -c SRR1222430.4 tmp/klebsiella_5_1_2_qual_thres.fq` +ret2=`grep -c SRR1222430.4 tmp/klebsiella_5_2_2_qual_thres.fq` + +test $ret1=2 || exit 202804 +test $ret1=2 || exit 202805 + +rm -f tmp/klebsiella_5_1_bad_scores.undefined.fq +rm -f tmp/klebsiella_5_2_bad_scores.undefined.fq +rm -f tmp/klebsiella_5_1_2_qual_thres.fq +rm -f tmp/klebsiella_5_2_2_qual_thres.fq + + + echo " " echo "##################################################################################" echo "testing ROCK with no quality score threshold for nucleotides and PE processed separately" -../src/rock -C 100 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated.txt ${srcdir}/data/fastq.raw/klebsiella_5_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_5_2_bad_scores.fq || exit 201 +../src/rock -C 100 -c 1 -l 2 -o ${srcdir}/data/iofiles.args/output_files_2_PE_separated.txt ${srcdir}/data/fastq.raw/klebsiella_5_1_bad_scores.fq,${srcdir}/data/fastq.raw/klebsiella_5_2_bad_scores.fq || exit 203 [ -f "tmp/klebsiella_5_1_bad_scores.undefined.fq" ] || exit 30100 [ -f "tmp/klebsiella_5_2_bad_scores.undefined.fq" ] || exit 30101