From 8c0033d1cd0cea9d09c281b62fc8dedf80810349 Mon Sep 17 00:00:00 2001 From: Veronique Legrand <vlegrand@pasteur.fr> Date: Thu, 9 Dec 2021 11:39:03 +0100 Subject: [PATCH] fixed bug related to the handling of '+' char in scores; did some leaning; updated version number --- configure | 211 ++++++++++++++++++------------------- configure.ac | 2 +- src/FqBaseBackend.h | 8 +- src/FqMainBackend.cpp | 1 + src/ROCKparams.cpp | 80 ++++++-------- src/unit_test_fqreader.cpp | 2 +- 6 files changed, 147 insertions(+), 157 deletions(-) diff --git a/configure b/configure index b2f7a53..6eb122b 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for rock 1.9.3. +# Generated by GNU Autoconf 2.69 for rock 1.9.5. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -576,8 +576,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='rock' PACKAGE_TARNAME='rock' -PACKAGE_VERSION='1.9.3' -PACKAGE_STRING='rock 1.9.3' +PACKAGE_VERSION='1.9.5' +PACKAGE_STRING='rock 1.9.5' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -594,6 +594,7 @@ am__nodep AMDEPBACKSLASH AMDEP_FALSE AMDEP_TRUE +am__quote am__include DEPDIR OBJEXT @@ -679,8 +680,7 @@ PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR -SHELL -am__quote' +SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking @@ -1236,7 +1236,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures rock 1.9.3 to adapt to many kinds of systems. +\`configure' configures rock 1.9.5 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1307,7 +1307,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of rock 1.9.3:";; + short | recursive ) echo "Configuration of rock 1.9.5:";; esac cat <<\_ACEOF @@ -1397,7 +1397,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -rock configure 1.9.3 +rock configure 1.9.5 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1452,7 +1452,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by rock $as_me 1.9.3, which was +It was created by rock $as_me 1.9.5, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -1942,7 +1942,7 @@ test -n "$target_alias" && NONENONEs,x,x, && program_prefix=${target_alias}- -am__api_version='1.16' +am__api_version='1.15' # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or @@ -2428,7 +2428,7 @@ fi # Define the identity of the package. PACKAGE='rock' - VERSION='1.9.3' + VERSION='1.9.5' cat >>confdefs.h <<_ACEOF @@ -2458,8 +2458,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # For better backward compatibility. To be removed once Automake 1.9.x # dies out for good. For more background, see: -# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html> -# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> +# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html> +# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> mkdir_p='$(MKDIR_P)' # We need awk for the "check" target (and possibly the TAP driver). The @@ -2510,7 +2510,7 @@ END Aborting the configuration process, to ensure you take notice of the issue. You can download and install GNU coreutils to get an 'rm' implementation -that behaves properly: <https://www.gnu.org/software/coreutils/>. +that behaves properly: <http://www.gnu.org/software/coreutils/>. If you want to complete the configuration process using your problematic 'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM @@ -3033,45 +3033,45 @@ DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 -$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; } -cat > confinc.mk << 'END' + +am_make=${MAKE-make} +cat > confinc << 'END' am__doit: - @echo this is the am__doit target >confinc.out + @echo this is the am__doit target .PHONY: am__doit END +# If we don't find an include directive, just comment out the code. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 +$as_echo_n "checking for style of include used by $am_make... " >&6; } am__include="#" am__quote= -# BSD make does it like this. -echo '.include "confinc.mk" # ignored' > confmf.BSD -# Other make implementations (GNU, Solaris 10, AIX) do it like this. -echo 'include confinc.mk # ignored' > confmf.GNU -_am_result=no -for s in GNU BSD; do - { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 - (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } - case $?:`cat confinc.out 2>/dev/null` in #( - '0:this is the am__doit target') : - case $s in #( - BSD) : - am__include='.include' am__quote='"' ;; #( - *) : - am__include='include' am__quote='' ;; -esac ;; #( - *) : - ;; +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; esac - if test "$am__include" != "#"; then - _am_result="yes ($s style)" - break - fi -done -rm -f confinc.* confmf.* -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 -$as_echo "${_am_result}" >&6; } +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 +$as_echo "$_am_result" >&6; } +rm -f confinc confmf # Check whether --enable-dependency-tracking was given. if test "${enable_dependency_tracking+set}" = set; then : @@ -3921,7 +3921,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by rock $as_me 1.9.3, which was +This file was extended by rock $as_me 1.9.5, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3978,7 +3978,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -rock config.status 1.9.3 +rock config.status 1.9.5 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" @@ -4086,7 +4086,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # # INIT-COMMANDS # -AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" +AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" _ACEOF @@ -4534,35 +4534,29 @@ $as_echo "$as_me: executing $ac_file commands" >&6;} # Older Autoconf quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. - # TODO: see whether this extra hack can be removed once we start - # requiring Autoconf 2.70 or later. - case $CONFIG_FILES in #( - *\'*) : - eval set x "$CONFIG_FILES" ;; #( - *) : - set x $CONFIG_FILES ;; #( - *) : - ;; -esac + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac shift - # Used to flag and report bootstrapping failures. - am_rc=0 - for am_mf + for mf do # Strip MF so we end up with the name of the file. - am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'` - # Check whether this is an Automake generated Makefile which includes - # dependency-tracking related rules and includes. - # Grep'ing the whole file directly is not great: AIX grep has a line + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named 'Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. - sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ - || continue - am_dirpart=`$as_dirname -- "$am_mf" || -$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$am_mf" : 'X\(//\)[^/]' \| \ - X"$am_mf" : 'X\(//\)$' \| \ - X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$am_mf" | + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`$as_dirname -- "$mf" || +$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$mf" : 'X\(//\)[^/]' \| \ + X"$mf" : 'X\(//\)$' \| \ + X"$mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$mf" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -4580,48 +4574,53 @@ $as_echo X"$am_mf" | q } s/.*/./; q'` - am_filepart=`$as_basename -- "$am_mf" || -$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ - X"$am_mf" : 'X\(//\)$' \| \ - X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$am_mf" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running 'make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "$am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`$as_dirname -- "$file" || +$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$file" : 'X\(//\)[^/]' \| \ + X"$file" : 'X\(//\)$' \| \ + X"$file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } - /^X\/\(\/\/\)$/{ + /^X\(\/\/\)[^/].*/{ s//\1/ q } - /^X\/\(\/\).*/{ + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` - { echo "$as_me:$LINENO: cd "$am_dirpart" \ - && sed -e '/# am--include-marker/d' "$am_filepart" \ - | $MAKE -f - am--depfiles" >&5 - (cd "$am_dirpart" \ - && sed -e '/# am--include-marker/d' "$am_filepart" \ - | $MAKE -f - am--depfiles) >&5 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } || am_rc=$? + as_dir=$dirpart/$fdir; as_fn_mkdir_p + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done done - if test $am_rc -ne 0; then - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "Something went wrong bootstrapping makefile fragments - for automatic dependency tracking. Try re-running configure with the - '--disable-dependency-tracking' option to at least be able to build - the package (albeit without support for automatic dependency tracking). -See \`config.log' for more details" "$LINENO" 5; } - fi - { am_dirpart=; unset am_dirpart;} - { am_filepart=; unset am_filepart;} - { am_mf=; unset am_mf;} - { am_rc=; unset am_rc;} - rm -f conftest-deps.mk } ;; diff --git a/configure.ac b/configure.ac index 4455482..fdf809f 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.59) -AC_INIT(rock, 1.9.3) +AC_INIT(rock, 1.9.5) AC_CANONICAL_SYSTEM diff --git a/src/FqBaseBackend.h b/src/FqBaseBackend.h index a91a8ac..5e73247 100644 --- a/src/FqBaseBackend.h +++ b/src/FqBaseBackend.h @@ -89,8 +89,8 @@ protected: // for writing undefined (reads that don't contain a sufficient number of correct k-mers) // correct k-mers are k-mers that contain only nucleotides with a quality score greater than a given threshold (default is 0). // expected minimum number of correct k-mers is provided by the user. Default is 1 - char * undef_filename; - int undef_f_desc; + //char * undef_filename; + //int undef_f_desc; static FasqQualThreshold qual_thres; @@ -121,8 +121,8 @@ public: f_id=0; o_f_desc=-1; o_filename=NULL; - undef_f_desc=-1; - undef_filename=NULL; + //undef_f_desc=-1; + // undef_filename=NULL; o_buf=NULL; pos_in_w_buf=NULL; diff --git a/src/FqMainBackend.cpp b/src/FqMainBackend.cpp index fe85cff..bb37e78 100644 --- a/src/FqMainBackend.cpp +++ b/src/FqMainBackend.cpp @@ -158,6 +158,7 @@ void FqMainBackend::processBuf(T_buf_info& buf_info,unsigned char f_id,unsigned } break; case k_read_qual_start: + if (qual_score) goto inc_score; // In this case we are already processing the read score. The + char is simply phred score 43-33=10 if phred33. qual_score=1; fq_rec_info.nb_k_mers_in_error=0; fq_rec_info.nb_k_mers_in_error_in_PE2=0; diff --git a/src/ROCKparams.cpp b/src/ROCKparams.cpp index 634642d..38a066f 100755 --- a/src/ROCKparams.cpp +++ b/src/ROCKparams.cpp @@ -37,51 +37,41 @@ const int ROCKparams::output_ext; const int ROCKparams::undef_ext; -/* -void ROCKparams::computeLambda() { - unsigned long tmp=parms.filter_size; - tmp*=1073741824; // this is in fact 1024*1024*1024. - parms.lambda=tmp/UINT_MAX; - if (parms.kappa>get_mask<unsigned char>::value) parms.lambda=parms.lambda/sizeof(unsigned short); -}*/ -/* -int ROCKparams::getfilterPEMode() { - return parms.filter_PE_separately; -}*/ + void usage(int status) { -cout<<"ROCK 1.9.3 Copyright (C) 2016-2021 Institut Pasteur"<<endl; -cout<<endl; -cout<<"Reducing Over-Covering K-mers within FASTQ file(s)"<<endl; -cout<<endl; -cout<<"USAGE: rock [options] [files]"<<endl; -cout<<endl; -cout<<"OPTIONS:"<<endl; -cout<<" -i <file> file containing the name(s) of the input FASTQ file(s) to"<<endl; -cout<<" process; single-end: one file name per line; paired-end:"<<endl; -cout<<" two file names per line separated by a comma; up to 15"<<endl; -cout<<" FASTQ file names can be specified; of note, input file"<<endl; -cout<<" name(s) can also be specified as program argument(s)"<<endl; -cout<<" -o <file> file containing the name(s) of the output FASTQ file(s);"<<endl; -cout<<" FASTQ file name(s) should be structured in the same way as"<<endl; -cout<<" the file specified in option -i."<<endl; -cout<<" -k <int> k-mer length (default 25)"<<endl; -cout<<" -c <int> lower-bound k-mer coverage depth threshold (default: 0)"<<endl; -cout<<" -C <int> upper-bound k-mer coverage depth threshold (default: 70)"<<endl; -cout<<" -l <int> number of hashing function(s) (default: 4)"<<endl; -cout<<" -n <int> expected total number of distinct k-mers within the input"<<endl; -cout<<" read sequences; not compatible with option -l."<<endl; -cout<<" -f <float> maximum expected false positive probability when computing"<<endl; -cout<<" the optimal number of hashing functions from the number of"<<endl; -cout<<" distinct k-mers specified with option -n (default: 0.05)."<<endl; -cout<<" -q <int> sets as valid only k-mers made up of nucleotides with"<<endl; -cout<<" Phred score (+33 offset) above this cutoff (default: 0)"<<endl; -cout<<" -m <int> minimum number of valid k-mer(s) to consider a read; all"<<endl; -cout<<" non-considered reads are written into output file(s) with"<<endl; -cout<<" extension undefined.fastq (default: 1)"<<endl; -cout<<" -v verbose mode"<<endl; -cout<<" -h prints this message and exit"<<endl; -exit(status); + cout<<endl; + cout<<"ROCK 1.9.5 Copyright (C) 2016-2021 Institut Pasteur"<<endl; + cout<<endl; + cout<<"Reducing Over-Covering K-mers within FASTQ file(s)"<<endl; + cout<<endl; + cout<<"USAGE: rock [options] [files]"<<endl; + cout<<endl; + cout<<"OPTIONS:"<<endl; + cout<<" -i <file> file containing the name(s) of the input FASTQ file(s) to"<<endl; + cout<<" process; single-end: one file name per line; paired-end:"<<endl; + cout<<" two file names per line separated by a comma; up to 15"<<endl; + cout<<" FASTQ file names can be specified; of note, input file"<<endl; + cout<<" name(s) can also be specified as program argument(s)"<<endl; + cout<<" -o <file> file containing the name(s) of the output FASTQ file(s);"<<endl; + cout<<" FASTQ file name(s) should be structured in the same way as"<<endl; + cout<<" the file specified in option -i."<<endl; + cout<<" -k <int> k-mer length (default 25)"<<endl; + cout<<" -c <int> lower-bound k-mer coverage depth threshold (default: 0)"<<endl; + cout<<" -C <int> upper-bound k-mer coverage depth threshold (default: 70)"<<endl; + cout<<" -l <int> number of hashing function(s) (default: 4)"<<endl; + cout<<" -n <int> expected total number of distinct k-mers within the input"<<endl; + cout<<" read sequences; not compatible with option -l."<<endl; + cout<<" -f <float> maximum expected false positive probability when computing"<<endl; + cout<<" the optimal number of hashing functions from the number of"<<endl; + cout<<" distinct k-mers specified with option -n (default: 0.05)."<<endl; + cout<<" -q <int> sets as valid only k-mers made up of nucleotides with"<<endl; + cout<<" Phred score (+33 offset) above this cutoff (default: 0)"<<endl; + cout<<" -m <int> minimum number of valid k-mer(s) to consider a read;"<<endl; + cout<<" -v verbose mode"<<endl; + cout<<" -h prints this message and exit"<<endl; + cout<<endl; + exit(status); } @@ -223,10 +213,10 @@ void ROCKparams::changeExtension(string& FName,const int& extension_type) { // c if (o_found!=std::string::npos) FName.replace(o_found,1,".rock."); else FName.append(".rock.fq"); } - else { +/* else { if (o_found!=std::string::npos) FName.replace(o_found,1,".undefined."); else FName.append(".undefined.fq"); - } + }*/ } string ROCKparams::genUndefFilename(const string& fname,const string& dname) { diff --git a/src/unit_test_fqreader.cpp b/src/unit_test_fqreader.cpp index 22e1104..bb72773 100644 --- a/src/unit_test_fqreader.cpp +++ b/src/unit_test_fqreader.cpp @@ -89,7 +89,7 @@ void test_processSingleFile() { } /* - * Expects a given minimum of k_mers to be present in the read or else it is dumped as undefined + * Expects a given minimum of k_mers to be present in the read. */ void test_processSingleFileWithMQOption() { srp sr; -- GitLab