diff --git a/Dummy_rock/Dummy_rock/Dummy_rock.1 b/Dummy_rock/Dummy_rock/Dummy_rock.1 deleted file mode 100644 index 50ca864869ebb64a64050cf295c4595a327512c7..0000000000000000000000000000000000000000 --- a/Dummy_rock/Dummy_rock/Dummy_rock.1 +++ /dev/null @@ -1,79 +0,0 @@ -.\"Modified from man(1) of FreeBSD, the NetBSD mdoc.template, and mdoc.samples. -.\"See Also: -.\"man mdoc.samples for a complete listing of options -.\"man mdoc for the short list of editing options -.\"/usr/share/misc/mdoc.template -.Dd 07/12/15 \" DATE -.Dt Dummy_rock 1 \" Program name and manual section number -.Os Darwin -.Sh NAME \" Section Header - required - don't modify -.Nm Dummy_rock, -.\" The following lines are read in generating the apropos(man -k) database. Use only key -.\" words here as the database is built based on the words here and in the .ND line. -.Nm Other_name_for_same_program(), -.Nm Yet another name for the same program. -.\" Use .Nm macro to designate other names for the documented program. -.Nd This line parsed for whatis database. -.Sh SYNOPSIS \" Section Header - required - don't modify -.Nm -.Op Fl abcd \" [-abcd] -.Op Fl a Ar path \" [-a path] -.Op Ar file \" [file] -.Op Ar \" [file ...] -.Ar arg0 \" Underlined argument - use .Ar anywhere to underline -arg2 ... \" Arguments -.Sh DESCRIPTION \" Section Header - required - don't modify -Use the .Nm macro to refer to your program throughout the man page like such: -.Nm -Underlining is accomplished with the .Ar macro like this: -.Ar underlined text . -.Pp \" Inserts a space -A list of items with descriptions: -.Bl -tag -width -indent \" Begins a tagged list -.It item a \" Each item preceded by .It macro -Description of item a -.It item b -Description of item b -.El \" Ends the list -.Pp -A list of flags and their descriptions: -.Bl -tag -width -indent \" Differs from above in tag removed -.It Fl a \"-a flag as a list item -Description of -a flag -.It Fl b -Description of -b flag -.El \" Ends the list -.Pp -.\" .Sh ENVIRONMENT \" May not be needed -.\" .Bl -tag -width "ENV_VAR_1" -indent \" ENV_VAR_1 is width of the string ENV_VAR_1 -.\" .It Ev ENV_VAR_1 -.\" Description of ENV_VAR_1 -.\" .It Ev ENV_VAR_2 -.\" Description of ENV_VAR_2 -.\" .El -.Sh FILES \" File used or created by the topic of the man page -.Bl -tag -width "/Users/joeuser/Library/really_long_file_name" -compact -.It Pa /usr/share/file_name -FILE_1 description -.It Pa /Users/joeuser/Library/really_long_file_name -FILE_2 description -.El \" Ends the list -.\" .Sh DIAGNOSTICS \" May not be needed -.\" .Bl -diag -.\" .It Diagnostic Tag -.\" Diagnostic informtion here. -.\" .It Diagnostic Tag -.\" Diagnostic informtion here. -.\" .El -.Sh SEE ALSO -.\" List links in ascending order by section, alphabetically within a section. -.\" Please do not reference files that do not exist without filing a bug report -.Xr a 1 , -.Xr b 1 , -.Xr c 1 , -.Xr a 2 , -.Xr b 2 , -.Xr a 3 , -.Xr b 3 -.\" .Sh BUGS \" Document known, unremedied bugs -.\" .Sh HISTORY \" Document history if command behaves in a unique manner \ No newline at end of file diff --git a/Dummy_rock/Dummy_rock/main.c b/Dummy_rock/Dummy_rock/main.c deleted file mode 100644 index a25cf869e41f0b5901fe298e5581b1a46d394acc..0000000000000000000000000000000000000000 --- a/Dummy_rock/Dummy_rock/main.c +++ /dev/null @@ -1,18 +0,0 @@ -// -// main.c -// Dummy_rock -// -// Created by vlegrand on 07/12/15. -// Copyright (c) 2015 vlegrand. All rights reserved. -// - -#include <stdio.h> - -int main(int argc, const char * argv[]) -{ - - // insert code here... - printf("Hello, World!\n"); - return 0; -} - diff --git a/Dummy_rock/Dummy_rock/main.cpp b/Dummy_rock/Dummy_rock/main.cpp deleted file mode 100644 index 2f61be73d3feeff1438f68a971333f202ba7dd2c..0000000000000000000000000000000000000000 --- a/Dummy_rock/Dummy_rock/main.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// -// main.cpp -// Dummy_rock -// -// Created by vlegrand on 07/12/15. -// Copyright (c) 2015 vlegrand. All rights reserved. -// - -#include <iostream> - -int main(int argc, const char * argv[]) -{ - - // insert code here... - std::cout << "Hello, World!\n"; - return 0; -} - diff --git a/configure.ac b/configure.ac index db70897adab983dbf81f31ee6afd3ca02c929871..f810bdd7b22c2081017c2bb3038b5702f6fd656e 100644 --- a/configure.ac +++ b/configure.ac @@ -7,7 +7,11 @@ AC_INIT(rock, 1.0) AC_CANONICAL_SYSTEM AM_INIT_AUTOMAKE() -AC_PROG_CC +# Checks for programs. +AC_PROG_CXX +AC_PROG_RANLIB +# AC_CHECK_PROG(POD2MAN, pod2man, pod2man, :) + AC_CONFIG_FILES(Makefile src/Makefile) AC_OUTPUT diff --git a/data/test_single.fq b/data/test_single.fq index c457b1f04549d4b4539d7b97db64ea8d72ae9db0..8416483906c3c0546c68766ea0112d95bd332449 100644 --- a/data/test_single.fq +++ b/data/test_single.fq @@ -6,3 +6,19 @@ AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE CATGAAGGAACGCTTAGTCCAGTTTTATACGGACATGAACATTGATGGGAACTTTATTTCGCTTGGTAATAATACATGGGGACGTCGAGCATGGTATCCAATTGATGCGATCGATGAAGAAGTTCAAACACACTCAGCTCCGAAGAAAAAA + AAAAAEEEEEEEEEEEEEEEEEEEEEEE6EEEEEEEEEEEEEEEEEEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE/EEEEAEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEE//EEEEEEEEEEEEEEAEEEEEEEA +@NS500443:42:H3MH2AFXX:1:11101:3205:10492/1 +ATTAGGCACATTGTTTCCTTCAAGTGCGGAAATTGGAATGATTTCTGCAAAATCAAGTTTTTCTCTATAGGTTTCTATTAGCTGGATAAGTTCATCTGGACTGACTAAATCAATTTTATTGATCAATAGAAAAACAGGTGTATCCACTTGT ++ +AAAAAEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEEEEEEAEEEEEEEAEEEEEEEEEEAEEEEEEEEEEEAEEEEEEEEEEE6AAEEEEEEAEEEE<EEAAEEAEE<EEAE/<EEA/EEEEEEAAEEEE/EEEA<EE +@NS500443:42:H3MH2AFXX:1:11101:11822:11415/1 +ATATTACGAACTATTTTTTTGCACGAACTGTCCAACACATTGAGGAAACGCTGTTTAAAAATGGCTATTCCACCATTATTTGTAACACAGATCGAAGTCTCGAAAAGGAAAAAGCTTATCTTGAAATGTTGAAGGCGAAAATGATCGACGG ++ +AAA/AEEEEEEEEEEEEEEEEEEEEE6EEEEEEEAEEEEEEEEEEEE6EEEEEEEEEEEEEEEEEEEEE//AE/E<EEEEEEEEAE/EEEEEEEEEEEEEEEEEE<EEEEEEEEAEEAEEE<<EEEAAEEEEEAAAEEEEEEEEEAAA6EE +@NS500443:42:H3MH2AFXX:1:11101:17702:11983/1 +TTGGTAGACCGAGTGATTCCGATGTTGCCGCATAAACTTTCAAATGGAATTTGTTCGCTGAATCCGCAAGTGGATCGCTTTACGCTTAGCTGTGTGATGGAAATTGATGCGGACGGGCAAGTCGTGAATCACGAGATTTTTGAAAGTGTGA ++ +AAAAAEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEE +@NS500443:42:H3MH2AFXX:1:11101:11233:12467/1 +GTATTTGCGGCATGTGAAAGAGCAAATGGGCGATTTGCCAGTTGCGATTGAATTTCGGAATAGCAGCTGGTATAGTGATGCCAATTATGAAAAAACGCTAGCGCTACTGACTGAACTTGGATTCATTCACGTGGTTGTCGATGAGCCGCAA ++ +AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEAE<<AAEA diff --git a/rock-doc/main.c b/rock-doc/main.c deleted file mode 100644 index 582b52c40c21d7cfb9d0883e75c808f105e08154..0000000000000000000000000000000000000000 --- a/rock-doc/main.c +++ /dev/null @@ -1,18 +0,0 @@ -// -// main.c -// rock-doc -// -// Created by vlegrand on 07/12/15. -// Copyright (c) 2015 vlegrand. All rights reserved. -// - -#include <stdio.h> - -int main(int argc, const char * argv[]) -{ - - // insert code here... - printf("Hello, World!\n"); - return 0; -} - diff --git a/rock-doc/rock_doc.1 b/rock-doc/rock_doc.1 deleted file mode 100644 index f00fd075e996374307e295e5025fc854d48919d5..0000000000000000000000000000000000000000 --- a/rock-doc/rock_doc.1 +++ /dev/null @@ -1,79 +0,0 @@ -.\"Modified from man(1) of FreeBSD, the NetBSD mdoc.template, and mdoc.samples. -.\"See Also: -.\"man mdoc.samples for a complete listing of options -.\"man mdoc for the short list of editing options -.\"/usr/share/misc/mdoc.template -.Dd 07/12/15 \" DATE -.Dt rock-doc 1 \" Program name and manual section number -.Os Darwin -.Sh NAME \" Section Header - required - don't modify -.Nm rock-doc, -.\" The following lines are read in generating the apropos(man -k) database. Use only key -.\" words here as the database is built based on the words here and in the .ND line. -.Nm Other_name_for_same_program(), -.Nm Yet another name for the same program. -.\" Use .Nm macro to designate other names for the documented program. -.Nd This line parsed for whatis database. -.Sh SYNOPSIS \" Section Header - required - don't modify -.Nm -.Op Fl abcd \" [-abcd] -.Op Fl a Ar path \" [-a path] -.Op Ar file \" [file] -.Op Ar \" [file ...] -.Ar arg0 \" Underlined argument - use .Ar anywhere to underline -arg2 ... \" Arguments -.Sh DESCRIPTION \" Section Header - required - don't modify -Use the .Nm macro to refer to your program throughout the man page like such: -.Nm -Underlining is accomplished with the .Ar macro like this: -.Ar underlined text . -.Pp \" Inserts a space -A list of items with descriptions: -.Bl -tag -width -indent \" Begins a tagged list -.It item a \" Each item preceded by .It macro -Description of item a -.It item b -Description of item b -.El \" Ends the list -.Pp -A list of flags and their descriptions: -.Bl -tag -width -indent \" Differs from above in tag removed -.It Fl a \"-a flag as a list item -Description of -a flag -.It Fl b -Description of -b flag -.El \" Ends the list -.Pp -.\" .Sh ENVIRONMENT \" May not be needed -.\" .Bl -tag -width "ENV_VAR_1" -indent \" ENV_VAR_1 is width of the string ENV_VAR_1 -.\" .It Ev ENV_VAR_1 -.\" Description of ENV_VAR_1 -.\" .It Ev ENV_VAR_2 -.\" Description of ENV_VAR_2 -.\" .El -.Sh FILES \" File used or created by the topic of the man page -.Bl -tag -width "/Users/joeuser/Library/really_long_file_name" -compact -.It Pa /usr/share/file_name -FILE_1 description -.It Pa /Users/joeuser/Library/really_long_file_name -FILE_2 description -.El \" Ends the list -.\" .Sh DIAGNOSTICS \" May not be needed -.\" .Bl -diag -.\" .It Diagnostic Tag -.\" Diagnostic informtion here. -.\" .It Diagnostic Tag -.\" Diagnostic informtion here. -.\" .El -.Sh SEE ALSO -.\" List links in ascending order by section, alphabetically within a section. -.\" Please do not reference files that do not exist without filing a bug report -.Xr a 1 , -.Xr b 1 , -.Xr c 1 , -.Xr a 2 , -.Xr b 2 , -.Xr a 3 , -.Xr b 3 -.\" .Sh BUGS \" Document known, unremedied bugs -.\" .Sh HISTORY \" Document history if command behaves in a unique manner \ No newline at end of file diff --git a/src/fqreader.cpp b/src/fqreader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7171d022ff9dcad39aa14dc986f0b866f824a1c4 --- /dev/null +++ b/src/fqreader.cpp @@ -0,0 +1,115 @@ +/* + * fqreader.cpp + * + * Created on: Dec 8, 2015 + * Author: vlegrand + */ +#include <stdio.h> +//#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <err.h> +#include <limits.h> + +//#include "srp.h" +#include "fqreader.h" + +const size_t bufsize=10240; +// const size_t nb_reads=6; /* for my tests for the moment */ + +rpos init_rpos(unsigned char f_id, unsigned long rstart_offset) { + rpos rp; + rp.fileid=f_id <<4; + unsigned long j,k; + k=rstart_offset%INT_MAX; + j=rstart_offset/INT_MAX; + rp.read_a1=rstart_offset-INT_MAX*j; + return rp; +} + + +/* + * Assume qualiy is phred 32. + * Main idea : a fqreader component takes as input files containing single reads or files containing PE reads and fills a structure on which other components can work on... + */ +void processBuf(char * buf,int nread,unsigned char f_id,unsigned long cur_offset, srp * io_sr) { + int cnt=0; + unsigned int s; + static unsigned int st; + static int num_l_in_rec; /* counter to know on which line inside the fastq record we are */ + static int qual_score=0; + static unsigned long rstart_offset; + char * pchar=buf; + while (cnt<=nread-1) { + switch (*pchar){ + case k_read_id_start: { + rstart_offset=cur_offset-nread+cnt; + num_l_in_rec=1; } + break; + case k_read_qual_start: { + qual_score=1; + st=0;} + break; + case '\n': { + num_l_in_rec+=1; + if (num_l_in_rec==5) {qual_score=0;/* end of fastq record */ + // debug stuff + /*printf("\nquality score is : %d \n",st); + printf("read start_offset is %lu \n",rstart_offset);*/ + rpos rp=init_rpos(f_id,rstart_offset); + i_dim& ref_i_dim=(*io_sr)[st/K_SCORE_NORM_FACTOR]; + k_dim& ref_k_dim=ref_i_dim[rstart_offset/INT_MAX]; + ref_k_dim.push_back(rp); + } + } + break; + default: + { if (qual_score==1) { + s=(int)*pchar; + s-=k_phred_32; + st+=s; + } + } + } + pchar++; + cnt++; + } +} + +/* + * Processes 1 file containing single reads + */ +void processSingleFile(char * fq_s,unsigned char f_id, srp* io_sr) { + FILE * fp; + int st,s; + int cnt,nread; + unsigned long cur_offset,rstart_offset; + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + char buf[bufsize]; + int f_single=open(fq_s,O_RDONLY,mode); + if (f_single==-1) { + err(errno,"cannot open file: %s.",fq_s); + } + + fp=fdopen(f_single,"r"); + if (fp==NULL) { + err(errno,"cannot open file: %s.",fq_s); + } + + /* for each read, we want : offset, filenb, total quality score. */ + while ((nread=read(f_single,buf,bufsize))!=0) { + // printf("%s",buf); + cur_offset=ftell(fp); + processBuf((char *)&buf,nread,f_id,cur_offset,io_sr); + } + close(f_single); + +} + +/* Processes 1 pair of files containing PE reads.*/ +void processPEFiles(char * fq_1, int f_id1,char * gq_2, int f_id2,srp * io_sr ) { + +} + + diff --git a/src/fqreader.h b/src/fqreader.h new file mode 100644 index 0000000000000000000000000000000000000000..7a319d649baec1f51d70dbedbe2a36d857d7ab3e --- /dev/null +++ b/src/fqreader.h @@ -0,0 +1,19 @@ +/* + * fqreader.h + * + * Created on: Dec 8, 2015 + * Author: vlegrand + */ +#ifndef FQREADER_H +#define FQREADER_H +#include "srp.h" + +#define k_read_id_start '@' +#define k_read_qual_start '+' +#define k_phred_32 33 + + +/*void processBuf(char * buf,int nread,int cur_offset);*/ +void processSingleFile(char *, unsigned char, srp*); +void processPEFiles(char * fq_1, int f_id1,char * gq_2, int f_id2,srp *io_sr ); +#endif diff --git a/src/rock.c b/src/rock.c deleted file mode 100644 index e8f25abe801a299792e6ccd3291de661a930db19..0000000000000000000000000000000000000000 --- a/src/rock.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - ============================================================================ - Name : rock.c - Author : Véronique Legrand - Version : - Copyright : Your copyright notice - Description : Program in C, Ansi-style - ============================================================================ - */ - -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include <err.h> - -#define k_read_id_start '@' -#define k_read_qual_start '+' -#define k_phred_32 33 - -/* - * Assume qualiy is phred 32. - */ -void processBuf(char * buf,int nread,int cur_offset) { - int cnt; - int s; - static int st; - static int num_l_in_rec; /* counter to know on which line inside the fastq record we are */ - static int qual_score=0; - static int rstart_offset; - char * pchar=buf; - while (cnt<=nread-1) { - switch (*pchar){ - case k_read_id_start: { - rstart_offset=cur_offset-nread+cnt; - num_l_in_rec=1; } - break; - case k_read_qual_start: { - qual_score=1; - st=0;} - break; - case '\n': { - num_l_in_rec+=1; - if (num_l_in_rec==5) {qual_score=0;/* end of fastq record */ - // debug stuff - printf("\nquality score is : %d \n",st); - } - } - break; - default: - { if (qual_score==1) { - s=(int)*pchar; - s-=k_phred_32; - /*printf("%c %d\n",*pchar,s);*/ - st+=s; - } - } - } - pchar++; - cnt++; - - } -} - -int main(void) { - /* 1rst step : read a fastq and compute quality score for each record (PE or single) */ - char * fq_pe1="../data/LM201200065_S106_R1.TM.fq"; - char * fq_pe2="../data/LM201200065_S106_R2.TM.fq"; - char * fq_s="../data/LM201200065_S106_RS.TM.fq"; - char* fq_s_test="data/test_single.fq"; - - int st,s; - int cnt,nread; - - size_t bufsize=10240; - char buf[bufsize]; - mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; - int cur_offset,rstart_offset; - FILE * fp; - char * pchar; - - /* Let's start with the easiest case : single reads. */ - int f_single=open(fq_s_test,O_RDONLY,mode); - if (f_single==-1) { - err(errno,"cannot open file: %s.",fq_s_test); - } - - fp=fdopen(f_single,"r"); - if (fp==NULL) { - err(errno,"cannot open file: %s.",fq_s_test); - } - - /* for each read, we want : offset, filenb, total quality score. */ - while((nread=read(f_single,buf,bufsize))!=0) { - printf("%s",buf); - cur_offset=ftell(fp); - processBuf((char *)&buf,nread,cur_offset); - } - close(f_single); - return EXIT_SUCCESS; -} diff --git a/src/rock.cpp b/src/rock.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dea30b58558f02e3ce68eddadee5de87274693a6 --- /dev/null +++ b/src/rock.cpp @@ -0,0 +1,92 @@ +/* + ============================================================================ + Name : rock.c + Author : Véronique Legrand + Version : + Copyright : Your copyright notice + Description : see : https://projets.pasteur.fr/projects/homogeneisation-couverture-pre-assemblage/wiki + ============================================================================ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <err.h> +#include <string.h> + +#include "srp.h" +#include "fqreader.h" + +#define k_max_input_files 15 + + + + + +int main(void) { + /* 1rst step : read a fastq and compute quality score for each record (PE or single) */ + char * fq_pe1="../data/LM201200065_S106_R1.TM.fq"; + char * fq_pe2="../data/LM201200065_S106_R2.TM.fq"; + char * fq_s="../data/LM201200065_S106_RS.TM.fq"; + char* fq_s_test="data/test_single.fq"; + + /* All these data will be given from input. */ + char ** list_single=(char *[]){"../data/LM201200065_S106_RS.TM.fq"}; + int nb_single=1; + char ** list_pe=(char *[]) {"../data/LM201200065_S106_R1.TM.fq", + "../data/LM201200065_S106_R2.TM.fq"}; + int nb_pairs=1; + + + char ** l_files; /* array of filenames; indexed on fileid */ + int maxlen=0; + int len; + + int cnt; + int nb_all=nb_pairs*2+nb_single; /* TODO : check that nb_all doesn't exceed 15. We only have 4 bits to store fileid in memory! */ + + + for (cnt=0;cnt<nb_pairs;cnt++) { + char * pe1=list_pe[0]; + len=strlen(pe1); + if (len>maxlen) maxlen=len; + char * pe2=list_pe[1]; + len=strlen(pe2); + if (len>maxlen) maxlen=len; + list_pe+=2; + } + + for (cnt=0;cnt<nb_single;cnt++) { + char * single=list_single[0]; + len=strlen(single); + if (len>maxlen) maxlen=len; + list_single++; + } + + l_files=(char **) malloc((nb_all)*(maxlen+1)); + + for (cnt=0;cnt<nb_pairs*2;cnt++) { + if (list_pe!=NULL) strcpy(l_files[cnt],list_pe[cnt]); + } + for (cnt=nb_pairs*2;cnt<nb_all;cnt++) { + if (list_single!=NULL) strcpy(l_files[cnt],list_single[cnt-nb_pairs*2]); + } + + /* process files. Sequential processing for the moment. + * fileid is file position inside l_files array starting at 1. + */ + srp all_sr; + cnt=0; + while (cnt<2*nb_pairs) { + processPEFiles(l_files[cnt],cnt+1,l_files[cnt+1],cnt+2,&all_sr); + cnt+=2; + } + while (cnt<nb_all) { + processSingleFile(l_files[cnt],cnt+1,&all_sr); + cnt+=1; + } + + return EXIT_SUCCESS; +} diff --git a/src/srp.h b/src/srp.h new file mode 100644 index 0000000000000000000000000000000000000000..1941b1699ecf8ac32ff498e8bec75dbcb35d8860 --- /dev/null +++ b/src/srp.h @@ -0,0 +1,27 @@ +/* + * srp.h + * + * Created on: Jan 4, 2016 + * Author: vlegrand + */ + +#ifndef SRP_H_ +#define SRP_H_ + +#include <vector> +#include <map> + +#define K_SCORE_NORM_FACTOR 1000 + +typedef struct { /* Here store read offset in file whose id is fileid.*/ + unsigned char fileid; + unsigned long read_a1; + unsigned long read_a2; +}rpos; + +typedef std::vector<rpos> k_dim; +typedef std::map<unsigned long,k_dim> i_dim; +typedef std::map<unsigned long,i_dim> srp; + + +#endif /* SRP_H_ */ diff --git a/src/srp_old.h b/src/srp_old.h new file mode 100644 index 0000000000000000000000000000000000000000..89f06b6e5456711be0d4fe1f311d6eb5e2a47f64 --- /dev/null +++ b/src/srp_old.h @@ -0,0 +1,30 @@ +/* + * srp.h + * + * Created on: Dec 8, 2015 + * Author: vlegrand + * + * SRP stands for score-read position. Indeed, this structure assoiates a score with a read's position inthe input fq file. + */ + +#ifndef SRP_H_ +#define SRP_H_ + +typedef struct { /* Here store read offset in file whose id is fileid.*/ + char fileid; + unsigned long read_a1; +}rpos; + +char ** l_files; /* array of filenames; indexed on fileid */ + +typedef rpos *** srp; + +/* + * Physically, srp is : + * An array indexed on read total score (Arr1). + * Arr1 contains pointers to other other arrays (Arr2). + * Arr2 is indexed on something that is deduced from offset (read position in a file). + * Arr2 contains pointers to array of rpos structures. + */ + +#endif /* SRP_H_ */ diff --git a/src/unit_test_fqreader.cpp b/src/unit_test_fqreader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..43d3b3ad843a8a174c994d0e82104b3175ea6ba4 --- /dev/null +++ b/src/unit_test_fqreader.cpp @@ -0,0 +1,73 @@ +/* + * unit_test_fqreader.cpp + * + * Created on: Dec 8, 2015 + * Author: vlegrand + * unit testing for the fqreader component. + * Keep using assert for the moment, don't want to add a dependency on boost (or any other test framework) just for the tests. + */ +#include <stdio.h> +#include <iostream> +#include <limits.h> +#include <assert.h> +#include <stdlib.h> +#include "srp.h" +#include "fqreader.h" + +using namespace std; + +void test_processSingleFile() { + printf("MAX_UINT=%u \n",UINT_MAX); + srp sr; + unsigned char f_id=1; + processSingleFile((char *) "data/test_single.fq",f_id,&sr); + srp::reverse_iterator rit; + i_dim::iterator it_offs; + k_dim::iterator it_struct; + + int cnt_read=0; + + for (rit=sr.rbegin(); rit!=sr.rend(); ++rit) { //process map in reverse order (by decreasing scores). + // cout << "score="<<rit->first<<endl; + unsigned long score=rit->first; + assert(score==5); + for (it_offs=rit->second.begin();it_offs!=rit->second.end();it_offs++) { + unsigned long offset_quotient=it_offs->first; + assert(offset_quotient==0); + for (it_struct=it_offs->second.begin();it_struct!=it_offs->second.end();it_struct++) { + unsigned char fid_stored=it_struct->fileid; + assert(fid_stored >>4==f_id); + if (cnt_read==0) assert(it_struct->read_a1==0); + if (cnt_read==1) assert(it_struct->read_a1==350); + if (cnt_read==2) assert(it_struct->read_a1==699); + if (cnt_read==3) assert(it_struct->read_a1==1049); + if (cnt_read==4) assert(it_struct->read_a1==1400); + if (cnt_read==5) assert(it_struct->read_a1==1751); + cnt_read++; + /*int tmp=fid_stored >>4; + cout<<" fileid="<<tmp<<" read_a1="<<it_struct->read_a1<<endl;*/ + } + } + } + assert(cnt_read==6); +} + +void test_processPEFiles() { + char * fq_1_test=(char *) "data/test_PE1.fq"; + char * fq_2_test=(char *) "data/test_PE2.fq"; + +} + +void test_processAllFiles() { + +} + +int main(int argc, char **argv) { + /*char* fq_s_test="data/test_single.fq";*/ + test_processSingleFile(); + test_processPEFiles(); + test_processAllFiles(); /* mix PE together with single; nearly as in real life.*/ + + + /*void test_processPEFiles(fq_1_test,fq_2_test);*/ +}