Skip to content
Snippets Groups Projects
Commit f36873f5 authored by Veronique Legrand's avatar Veronique Legrand
Browse files

fixed unit test

parent 4e3ecb89
No related branches found
No related tags found
No related merge requests found
Pipeline #63714 passed
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project>
<configuration id="cdt.managedbuild.toolchain.gnu.macosx.base.481822517" name="Default">
<extension point="org.eclipse.cdt.core.LanguageSettingsProvider">
<provider copy-of="extension" id="org.eclipse.cdt.ui.UserLanguageSettingsProvider"/>
<provider-reference id="org.eclipse.cdt.core.ReferencedProjectsLanguageSettingsProvider" ref="shared-provider"/>
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuildCommandParser" id="org.eclipse.cdt.managedbuilder.core.GCCBuildCommandParser" keep-relative-paths="false" name="CDT GCC Build Output Parser" parameter="(g?cc)|([gc]\+\+)|(clang)" prefer-non-shared="true"/>
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuiltinSpecsDetector" console="false" env-hash="-1667358530298693438" id="org.eclipse.cdt.managedbuilder.core.GCCBuiltinSpecsDetector" keep-relative-paths="false" name="CDT GCC Built-in Compiler Settings" parameter="${COMMAND} ${FLAGS} -E -P -v -dD &quot;${INPUTS}&quot;" prefer-non-shared="true">
<language-scope id="org.eclipse.cdt.core.gcc"/>
<language-scope id="org.eclipse.cdt.core.g++"/>
</provider>
<provider-reference id="org.eclipse.cdt.managedbuilder.core.MBSLanguageSettingsProvider" ref="shared-provider"/>
</extension>
</configuration>
</project>
\ No newline at end of file
......@@ -25,7 +25,7 @@
#include "ReadProcessor.h"
#include <iostream>
//! Tried to inline this bur slightly slower. Stays here.
//! Tried to inline this but slightly slower. Stays here.
unsigned long ReadProcessor::nucleoToNumberWrap(char s) {
(s=='N')?n=k:n;
n--;
......@@ -36,7 +36,7 @@ void ReadProcessor::kMerAndRevToNumber(char * k_m,KmRevNumbers& numbers) {
unsigned long c,crev;
if (numbers.first) { // first k_mer conversion
int i;
int j=k;
int j=k;
for (i=0;i<k;i++) {
c=nucleoToNumberWrap(k_m[i]); // do not catch exception for the moment. If nucleoToNumber throws -1 and there is no catch clause for it, the terminate() function will be called automatically. Anyway, there's not munch we can do if it returns -1.
numbers.nbr=numbers.nbr<<2;
......@@ -45,7 +45,7 @@ void ReadProcessor::kMerAndRevToNumber(char * k_m,KmRevNumbers& numbers) {
numbers.nbr_rev=numbers.nbr_rev|crev;
j--;
}
numbers.first=0;
numbers.first=0;
} else {
c=nucleoToNumberWrap(k_m[k-1]);
numbers.nbr=numbers.nbr<<2;
......@@ -62,6 +62,7 @@ void ReadProcessor::kMerAndRevToNumber(char * k_m,KmRevNumbers& numbers) {
void ReadProcessor::getKMerNumbers(char * dnaStr,int l,std::vector<unsigned long>& my_vect) { // See simple_test.cpp and results. benchmark showed that std::vector is very slightly faster than C array and doesn't require more memory in our case. So, I am using it since it makes code simpler.
int i;
KmRevNumbers numbers;
initNumbers(numbers);
char * p_char=dnaStr;
int nb_k_m=l-k+1;
for (i=0; i<nb_k_m;i++) {
......
......@@ -35,12 +35,8 @@
#include <iostream>
using namespace std;
//! Gather here the numbers corresponding to a kmer and it's reverse complement once they were hashed.
typedef struct {
unsigned long nbr;
unsigned long nbr_rev;
unsigned int first; //! set this to 1 before processing the 1rst kmer of the read.
} KmRevNumbers;
class ReadProcessor {
......@@ -51,6 +47,7 @@ class ReadProcessor {
std::map<char, char> nucleo_rev_cpl;
unsigned long mask_kMer;
void init_nucleo_rev_cpl() {
nucleo_rev_cpl['A']='T';
nucleo_rev_cpl['T']='A';
......@@ -114,11 +111,24 @@ public:
init_mask(k);
}
//! Gather here the numbers corresponding to a kmer and it's reverse complement once they were hashed.
typedef struct {
unsigned long nbr;
unsigned long nbr_rev;
unsigned int first; //! set this to 1 before processing the 1rst kmer of the read.
} KmRevNumbers;
void initNumbers(KmRevNumbers& numbers) {
numbers.first=1;
numbers.nbr=0;
numbers.nbr_rev=0;
}
// Do not inline this; it is slightly slower.
// These functions are no more used. Now using the same loop to convert hash kmer and reverse complement.
/*unsigned long kMerToNumberReverse(char * k_m,unsigned long * p_prev);
unsigned long kMerToNumber(char * k_m,unsigned long * p_prev);*/
//! The 2 previous methods are now replacs by the one below.
void kMerAndRevToNumber(char * k_m,KmRevNumbers& numbers);
unsigned long nucleoToNumberWrap(char s);
......
File added
File added
File added
File added
File added
......@@ -226,7 +226,18 @@ void test_decomposeReadInkMerNums() {
decomposeReadInKMerNums(read_p,nbrKmerDecompo,k,a_seqs);
assert(nbrKmerDecompo.size()==147);
}
/*
void initNumbers(KmRevNumbers& numbers) {
numbers.first=1;
numbers.nbr=0;
numbers.nbr_rev=0;
}*/
void savNumbers(ReadProcessor::KmRevNumbers& numbers,ReadProcessor::KmRevNumbers& numbers_sav) {
numbers_sav.first=numbers.first;
numbers_sav.nbr=numbers.nbr;
numbers_sav.nbr_rev=numbers.nbr_rev;
}
/*
* Test the basic methods of the ReadProcessor object.
......@@ -244,32 +255,42 @@ void testDNAToNumberSimple() {
assert(read_p.nucleoToNumber('N')==0);
// unsigned long * p_prev=NULL;
KmRevNumbers numbers;
ReadProcessor::KmRevNumbers numbers;
read_p.initNumbers(numbers);
read_p.set_k(4);
read_p.kMerAndRevToNumber((char *) "AAAA",numbers);
assert(numbers.nbr==0); // TODO : this method should be private in ReadProcessor. Design a friend class to test them or declare this testing function friend in ReadProcessor.
assert(numbers.nbr_rev==255);
read_p.set_k(5);
read_p.initNumbers(numbers);
read_p.kMerAndRevToNumber((char *) "AANAA",numbers);
assert(numbers.nbr==0);
assert(numbers.nbr_rev==975);
read_p.set_k(14);
read_p.initNumbers(numbers);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGG",numbers);
assert(numbers.nbr==219386);
assert(numbers.nbr_rev==84779983);
ReadProcessor::KmRevNumbers numbers_orig,numbers_w;
savNumbers(numbers,numbers_orig);
savNumbers(numbers_orig,numbers_w);
read_p.set_k(15);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGA",numbers);
assert(numbers.nbr==877544);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGN",numbers);
assert(numbers.nbr==877544);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGC",numbers);
assert(numbers.nbr==877545);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGG",numbers);
assert(numbers.nbr==877546);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGT",numbers);
assert(numbers.nbr==877547);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGA",numbers_w);
assert(numbers_w.nbr==877544);
savNumbers(numbers_orig,numbers_w);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGN",numbers_w);
assert(numbers_w.nbr==877544);
savNumbers(numbers_orig,numbers_w);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGC",numbers_w);
assert(numbers_w.nbr==877545);
savNumbers(numbers_orig,numbers_w);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGG",numbers_w);
assert(numbers_w.nbr==877546);
savNumbers(numbers_orig,numbers_w);
read_p.kMerAndRevToNumber((char *) "AANAATCCGATTGGT",numbers_w);
assert(numbers_w.nbr==877547);
read_p.set_k(1);
assert(read_p.nucleoToNumberReverse('A',1)==3);
......@@ -285,30 +306,31 @@ void testDNAToNumberSimple() {
assert(read_p.nucleoToNumberReverse('T',20)==0);
assert(read_p.nucleoToNumberReverse('N',20)==0);
numbers.nbr=84779983;
numbers.nbr_rev=84779983;
read_p.set_k(14);
read_p.kMerAndRevToNumber((char *)"ANAATCCGATTGGA",numbers);
assert(numbers.nbr_rev==222521587);
numbers.nbr=84779983;
numbers.nbr_rev=84779983;
read_p.kMerAndRevToNumber((char *)"ANAATCCGATTGGN",numbers);
assert(numbers.nbr_rev==21194995);
numbers.nbr=84779983;
numbers.nbr_rev=84779983;
read_p.kMerAndRevToNumber((char *)"ANAATCCGATTGGT",numbers);
assert(numbers.nbr_rev==21194995);
numbers.nbr=84779983;
numbers.nbr_rev=84779983;
read_p.kMerAndRevToNumber((char *)"ANAATCCGATTGGC",numbers);
assert(numbers.nbr_rev==155412723);
numbers.nbr=84779983;
numbers.nbr_rev=84779983;
read_p.kMerAndRevToNumber((char *)"ANAATCCGATTGGG",numbers);
assert(numbers.nbr_rev==88303859);
}
/*
* Test the ReadProcessor method that will be called by the main program : the one who takes as input a dna string and returns
* a serie of numbers all corresponding to its k-mer composition + reverse complement.
*/
void testDNAToNumberMoreComplex() { // TODO in main, check that k<=32.
// from a dna string a found in fq files, return its k-mer decomposition; k-mers are numbers.
char dnaStr[]="TTTTTAGGTGCTACCATAACACCAACTGTTTTCACCATAATTTTAAAATCAAGCATTAGAGACGCTTCTCTAATGTATTGCAAATCTAGTTCTACCATTTGATGAAAATCTAATTCATTTCTTCCACTAACCTGCCATAATCCAGTACAACCTGGTATAACGGTCAAACGCTTTTTATCATAGGAACTGTATTCTCCTACCTCACGTGGCAAAGGAGGTCTTGGACCAACAATTGCCATGTCTCCTTTAACCACATTCCAAAGCTGTGGTA";
......@@ -325,35 +347,44 @@ void testDNAToNumberMoreComplex() { // TODO in main, check that k<=32.
assert(nbrKmerDecompo.size()==nb_expected_k_mers); // for each k_mer, we also expect to have a number for its reverse complement.
int cnt_k_mer=0;
unsigned long num,num2;
ReadProcessor::KmRevNumbers numbers,numbers_start;
read_p.initNumbers(numbers);
read_p.initNumbers(numbers_start);
read_p.kMerAndRevToNumber((char *) "TTTTTAGGTGCTACCATAACACCAACTGTT",numbers);
// 1rst k-mer is : TTTTTAGGTGCTACCATAACACCAACTGTT.
// its reverse complement is : AACAGTTGGTGTTATGGTAGCACCTAAAAA
for (it=nbrKmerDecompo.begin();it!=nbrKmerDecompo.end();it++) {
cnt_k_mer++;
// 1rst k-mer is : TTTTTAGGTGCTACCATAACACCAACTGTT.
// its reverse complement is : AACAGTTGGTGTTATGGTAGCACCTAAAAA
if (cnt_k_mer==1) {
assert(*it==1151987406330741231);
num=*it;
assert(*it==max(numbers.nbr,numbers.nbr_rev));
//num=*it;
}
if (cnt_k_mer==2) {
// 2nd k-mer is : TTTTAGGTGCTACCATAACACCAACTGTTT
num2=read_p.kMerToNumber((char *) "TTTTAGGTGCTACCATAACACCAACTGTTT",NULL);
assert(*it==num2);
unsigned long num3=read_p.kMerToNumber((char *) "TTTTAGGTGCTACCATAACACCAACTGTTT",&num);
assert(num3==num2);
read_p.kMerAndRevToNumber((char *) "TTTTAGGTGCTACCATAACACCAACTGTTT",numbers_start);
cout<<numbers_start.nbr<<endl;
cout<<numbers_start.nbr_rev<<endl;
cout<<*it<<endl;
assert(*it==numbers_start.nbr);
read_p.kMerAndRevToNumber((char *) "TTTTAGGTGCTACCATAACACCAACTGTTT",numbers);
assert(numbers.nbr==numbers_start.nbr);
}
if (cnt_k_mer==3) {
num=read_p.kMerToNumber((char *) "TTTAGGTGCTACCATAACACCAACTGTTTT",NULL);
unsigned long num_rev=read_p.kMerToNumber((char *) "AAAACAGTTGGTGTTATGGTAGCACCTAAA",NULL);
assert(max(num,num_rev)==*it);
read_p.initNumbers(numbers);
read_p.kMerAndRevToNumber((char *) "TTTAGGTGCTACCATAACACCAACTGTTTT",numbers);
assert(max(numbers.nbr,numbers.nbr_rev)==*it);
}
if (cnt_k_mer==4) {
num=read_p.kMerToNumber((char *) "TTAGGTGCTACCATAACACCAACTGTTTTC",NULL);
assert(num==*it);
read_p.initNumbers(numbers);
read_p.kMerAndRevToNumber((char *) "TTAGGTGCTACCATAACACCAACTGTTTTC",numbers);
assert(numbers.nbr==*it);
}
// k-mer number 10 is GCTACCATAACACCAACTGTTTTCACCATA
// its reverse complement is : TATGGTGAAAACAGTTGGTGTTATGGTAGC
if (cnt_k_mer==10) {
num=read_p.kMerToNumber((char *) "GCTACCATAACACCAACTGTTTTCACCATA",NULL);
//num=read_p.kMerToNumber((char *) "GCTACCATAACACCAACTGTTTTCACCATA",NULL);
assert(*it==max(704021989794238796,930978566989888201));
}
/*if (cnt_k_mer==20) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment