Select Git revision
ReadProcessor.cpp
ReadProcessor.cpp 3.21 KiB
/*
* ReadProcessor.cpp
*
* Created on: Feb 5, 2016
* Author: vlegrand
*/
#include <stdlib.h>
#include "ReadProcessor.h"
/*
inline void ReadProcessor::init_nucleo_rev_cpl () {
nucleo_rev_cpl['A']='T';
nucleo_rev_cpl['T']='A';
nucleo_rev_cpl['C']='G';
nucleo_rev_cpl['G']='C';
nucleo_rev_cpl['N']='N';
}*/
/*
inline void ReadProcessor::init_mask(int k) {
int nb_bits=2*k;
mask_kMer=pow(2,nb_bits);
mask_kMer=mask_kMer-1;
}*/
inline unsigned long ReadProcessor::nucleoToNumber(char s) {
unsigned long nbr;
switch(s)
{
case 'A':
nbr=0;
break;
case 'C':
nbr=1;
break;
case 'G':
nbr=2;
break;
case 'T':
nbr=3;
break;
case 'N':
nbr=0;
break;
default:
throw -1; // TODO Benchmark this inside try catch statement to see if try catch+exception really costs so long.
// throw an integer for the moment. An exception object may not be the most optimal choice in terms of performance.
}
return nbr;
}
inline unsigned long ReadProcessor::kMerToNumber(char * k_m,unsigned long * p_prev) {
unsigned long nbr=0;
unsigned long c;
if (p_prev==NULL) { // first k_mer conversion
int i;
for (i=0;i<k;i++) {
c=nucleoToNumber(k_m[i]); // do not catch exception for the moment (not until I have checked it doesn't slow down execution). If nucleoToNumber returns -1, program will simply crash
nbr=nbr<<2;
nbr=nbr|c;
}
} else {
nbr=*p_prev;
c=nucleoToNumber(k_m[k-1]);
nbr=nbr<<2;
nbr=nbr&mask_kMer;
nbr=nbr|c;
}
return nbr;
}
inline unsigned long ReadProcessor::nucleoToNumberReverse(char s,int i) {
unsigned long nbr=0;
char cpl=nucleo_rev_cpl[s];
nbr=nucleoToNumber(cpl);
nbr=nbr<<2*(i-1);
return nbr;
}
inline unsigned long ReadProcessor::kMerToNumberReverse(char * k_m,unsigned long * p_prev) {
unsigned long nbr=0;
unsigned long c;
if (p_prev==NULL) { // first k_mer conversion
int i;
for (i=k;i>0;i--) {
c=nucleoToNumberReverse(k_m[i-1],i);
// nbr=nbr>>2;
nbr=nbr|c;
}
} else {
nbr=*p_prev;
c=nucleoToNumberReverse(k_m[k-1],k);
nbr=nbr>>2;
nbr=nbr|c;
}
return nbr;
}
void ReadProcessor::getKMerNumbers(char * dnaStr,int l,std::vector<unsigned long>& my_vect) { // See simple_test.cpp and results. benchmark showed that std::vector is very slightly faster than C array and doesn't require more memory in our case. So, I am using it since it makes code simpler.
/*std::vector<unsigned long> my_vect;
return my_vect;*/
int i;
unsigned long num;
unsigned long num_rev;
unsigned long * p_prev=NULL;
unsigned long * p_prev_rev=NULL;
char * p_char=dnaStr;
int nb_k_m=l-k+1;
for (i=0; i<nb_k_m;i++) {
num=kMerToNumber(p_char,p_prev);
my_vect.push_back(num);
num_rev=kMerToNumberReverse(p_char,p_prev_rev);
my_vect.push_back(num_rev);
p_char++;
p_prev=#
p_prev_rev=&num_rev;
}
}