diff --git a/README.md b/README.md index 2a5ab34f14257d3e64dffca737c7fa4cb582fa16..72e1df6b6cd2be6303d52fc48c8623488bba0859 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ You will need to install the required programs listed in the following table, or | [_gzip_](https://www.gnu.org/software/gzip/) | - | > 1.5.0 | [ftp.gnu.org/gnu/gzip](https://ftp.gnu.org/gnu/gzip/) | | [_AlienDiscover_](https://gitlab.pasteur.fr/GIPhy/AlienDiscover) | - | ≥ 0.1 | [gitlab.pasteur.fr/GIPhy/AlienDiscover](https://gitlab.pasteur.fr/GIPhy/AlienDiscover) | | [_AlienRemover_](https://gitlab.pasteur.fr/GIPhy/AlienRemover) | - | ≥ 1.0 | [gitlab.pasteur.fr/GIPhy/AlienRemover](https://gitlab.pasteur.fr/GIPhy/AlienRemover) | -| [_AlienTrimmer_](https://research.pasteur.fr/en/software/alientrimmer/) | - | > 2.0 | [gitlab.pasteur.fr/GIPhy/AlienTrimmer](https://gitlab.pasteur.fr/GIPhy/AlienTrimmer) | +| [_AlienTrimmer_](https://research.pasteur.fr/en/software/alientrimmer/) | - | ≥ 2.1 | [gitlab.pasteur.fr/GIPhy/AlienTrimmer](https://gitlab.pasteur.fr/GIPhy/AlienTrimmer) | | [_FLASh_](https://ccb.jhu.edu/software/FLASH/) | - | > 1.2.10 | [sourceforge.net/projects/flashpage](https://sourceforge.net/projects/flashpage/) | | _fqconvert_ <br> _fqduplicate_ <br> _fqextract_ <br> _fqstats_ | [fqtools](http://ftp.pasteur.fr/pub/gensoft/projects/fqtools/) | ≥ 1.1a | [ftp.pasteur.fr/pub/gensoft/projects/fqtools](http://ftp.pasteur.fr/pub/gensoft/projects/fqtools/) | | [_Musket_](http://musket.sourceforge.net/homepage.htm)<sup> ✦</sup> | - | ≥ 1.1 | [sourceforge.net/projects/musket](https://sourceforge.net/projects/musket/) | @@ -49,10 +49,10 @@ You will need to install the required programs listed in the following table, or </div> -<sup> ★</sup> For some Mac OS X, it is worth noting that the default [BSD _xargs_](https://www.freebsd.org/cgi/man.cgi?xargs) does not offer all the functionalities required by _fqCleanER_. +<sup> ★</sup> <span style="font-size:0.9em;">For some Mac OS X, it is worth noting that the default [BSD _xargs_](https://www.freebsd.org/cgi/man.cgi?xargs) does not offer all the functionalities required by _fqCleanER_. However, the expected [GNU _xargs_](https://www.gnu.org/software/findutils/manual/html_node/find_html/xargs-options.html) (here named `gxargs`) can be easily installed using [_homebrew_](https://brew.sh) (i.e. `brew install findutils`). -Of note, _fqCleanER_ first looks for the `gxargs` binary on the `$PATH`, and, if missing, for the `xargs` binary.<br> -<sup> ✦</sup> When compiling the source code of [_Musket_](https://musket.sourceforge.net/homepage.htm#installation), it is recommended to edit its _Makefile_ to increase the value of the macro `MAX_SEQ_LENGTH` (e.g. 1000) in order to avoid any problem during the execution of _fqCleanER_. +Of note, _fqCleanER_ first looks for the `gxargs` binary on the `$PATH`, and, if missing, for the `xargs` binary.</span><br> +<sup> ✦</sup> <span style="font-size:0.9em;">When compiling the source code of [_Musket_](https://musket.sourceforge.net/homepage.htm#installation), it is recommended to edit its _Makefile_ to increase the value of the macro `MAX_SEQ_LENGTH` (e.g. 1000) in order to avoid any problem during the execution of _fqCleanER_.</span> @@ -80,7 +80,7 @@ chmod +x fqCleanER.sh **D.** If at least one of the required program (see [Dependencies](#dependencies)) is not available on your `$PATH` variable (or if one compiled binary has a different default name), _fqCleanER_ will exit with an error message. When running _fqCleanER_ without option, a usage documentation should be displayed (see below); otherwise, the name of the missing program is displayed before exiting. -In such a case, edit the file `fqCleanER.sh` and indicate the local path to the corresponding binary(ies) within the code block `REQUIREMENTS` (approximately lines 85-215). +In such a case, edit the file `fqCleanER.sh` and indicate the local path to the corresponding binary(ies) within the code block `REQUIREMENTS` (approximately lines 80-220). For each required program, the table below reports the corresponding variable assignment instruction to edit (if needed) within the code block `REQUIREMENTS` <div align="center"> @@ -186,17 +186,17 @@ Run _fqCleanER_ without option to read the following documentation: * The cleaning/enhancing steps can be specified using option `-s` in any order. The same step can be specified several times (e.g. `-s DTDNEN`). - **[C]** Contaminating HTS read removal (`-s C`) is performed using [_AlienRemover_](https://gitlab.pasteur.fr/GIPhy/AlienRemover) with default options. Contaminating sequences/_k_-mers are specified using option `-A`. When contaminating sequences are quite short (e.g. virus genomes), they can be directly specified via a FASTA-formatted file without affecting the overall running time. However, to consider large contaminating sequences (e.g. human genomes), it is highly recommended to precompute the corresponding _k_-mer set using [_AlienRemover_](https://gitlab.pasteur.fr/GIPhy/AlienRemover) and specify the corresponding _k_-mer file (kmr/kmz file extension) to observe fast running times. If the option `-A` is not set, calling step C enables to remove [phi-X174](https://www.ncbi.nlm.nih.gov/nucleotide/NC_001422.1) HTS reads. + <span style="color:navy; font-size:1.1em;">**[C]**</span> Contaminating HTS read removal (`-s C`) is performed using [_AlienRemover_](https://gitlab.pasteur.fr/GIPhy/AlienRemover) with default options. Contaminating sequences/_k_-mers are specified using option `-A`. When contaminating sequences are quite short (e.g. virus genomes), they can be directly specified via a FASTA-formatted file without affecting the overall running time. However, to consider large contaminating sequences (e.g. human genomes), it is highly recommended to precompute the corresponding _k_-mer set using [_AlienRemover_](https://gitlab.pasteur.fr/GIPhy/AlienRemover) and specify the corresponding _k_-mer file (kmr/kmz file extension) to observe fast running times. If the option `-A` is not set, calling step C enables to remove [phi-X174](https://www.ncbi.nlm.nih.gov/nucleotide/NC_001422.1) HTS reads. - **[D]** HTS read deduplication (`-s D`) is performed using [_fqduplicate_](http://ftp.pasteur.fr/pub/gensoft/projects/fqtools/). Note that a pair of PE reads (R1,R2) and (R1',R2') are considered as duplicated (i.e. identical) when R1 = R1' and R2 = R2'. + <span style="color:navy; font-size:1.1em;">**[D]**</span> HTS read deduplication (`-s D`) is performed using [_fqduplicate_](http://ftp.pasteur.fr/pub/gensoft/projects/fqtools/). Note that a pair of PE reads (R1,R2) and (R1',R2') are considered as duplicated (i.e. identical) when R1 = R1' and R2 = R2'. - **[E]** Sequencing error correction (`-s E`) is performed using [_Musket_](http://musket.sourceforge.net/homepage.htm) (Liu et al. 2013) with _k_-mer length _k_ = 21. This step generally requires quite important running times and will benefit from a large number of threads (option `-t`). + <span style="color:navy; font-size:1.1em;">**[E]**</span> Sequencing error correction (`-s E`) is performed using [_Musket_](http://musket.sourceforge.net/homepage.htm) (Liu et al. 2013) with _k_-mer length _k_ = 21. This step generally requires quite important running times and will benefit from a large number of threads (option `-t`). - **[L][N][R]** These three steps (`-s L`, `-s N`, `-s R`, respectively) are related to the digital normalization procedure (Brown et al. 2012), performed using [_ROCK_](https://research.pasteur.fr/en/software/rock) (Legrand et al. 2022a, 2022b) with _k_-mer length _k_ = 25. Given a lower-bound and a upper-bound coverage depth thresholds (options `-c` and `-C`, respectively), the digital normalization selects a subset of HTS reads such that every sequenced base has a coverage depth between these two bounds. When setting a moderate upper-bound (that is lower than the overall average coverage depth; default: `-C 90`), every sequenced base from the selected HTS read subset is expected to have a coverage depth close to this bound. When setting a small lower-bound (default: `-c 4`), all HTS reads corresponding to a sequenced region with coverage depth lower than this bound will be discarded (e.g. artefactual or erroneous HTS read, low-coverage contaminating HTS read). Step N (`-s N`) uses the two bounds (options `-C` and `-c`), whereas steps L/R (`-s L` and `-s R`, respectively) use only the lower-/upper-bound, respectively. + <span style="color:navy; font-size:1.1em;">**[L][N][R]**</span> These three steps (`-s L`, `-s N`, `-s R`, respectively) are related to the digital normalization procedure (Brown et al. 2012), performed using [_ROCK_](https://research.pasteur.fr/en/software/rock) (Legrand et al. 2022a, 2022b) with _k_-mer length _k_ = 25. Given a lower-bound and a upper-bound coverage depth thresholds (options `-c` and `-C`, respectively), the digital normalization selects a subset of HTS reads such that every sequenced base has a coverage depth between these two bounds. When setting a moderate upper-bound (that is lower than the overall average coverage depth; default: `-C 90`), every sequenced base from the selected HTS read subset is expected to have a coverage depth close to this bound. When setting a small lower-bound (default: `-c 4`), all HTS reads corresponding to a sequenced region with coverage depth lower than this bound will be discarded (e.g. artefactual or erroneous HTS read, low-coverage contaminating HTS read). Step N (`-s N`) uses the two bounds (options `-C` and `-c`), whereas steps L/R (`-s L` and `-s R`, respectively) use only the lower-/upper-bound, respectively. - **[M]** PE HTS read merging (`-s M`, only with PE input files) is performed using [_FLASh_](https://ccb.jhu.edu/software/FLASH/) (Magoc and Salzberg 2011) when the insert size is shorter than the sum of the two paired HTS read lengths. When using this step, dedicated output files are written ( _.M.fastq_ file extension). + <span style="color:navy; font-size:1.1em;">**[M]**</span> PE HTS read merging (`-s M`, only with PE input files) is performed using [_FLASh_](https://ccb.jhu.edu/software/FLASH/) (Magoc and Salzberg 2011) when the insert size is shorter than the sum of the two paired HTS read lengths. When using this step, dedicated output files are written ( _.M.fastq_ file extension). - **[T]** Trimming and clipping (`-s T`) are performed using [_AlienTrimmer_](https://research.pasteur.fr/en/software/alientrimmer/) (Criscuolo and Brisse 2013). Clipping is carried out based on the specified alien oligonucleotides (option `-a`), where alien oligonucleotide sequences can be (i) set using precomputed standard library names, (ii) specified via user-defined FASTA-formatted file, or (iii) directly estimated from the input files using [_AlienDiscover_](https://gitlab.pasteur.fr/GIPhy/AlienDiscover) (option `-a AUTO`). When step T is run without setting option `-a`, clipping is carried out with the four homopolymers (`POLY`) as alien oligonucleotides. Trimming is carried out by deleting 5' and 3' regions containing many non-confident bases, where a base is considered as non-confident when its Phred score is lower than a Phred score threshold (set using option `-q`; default: 15). After trimming/clipping an HTS read, it can be discarded when the number of remaining bases is lower than a specified length threshold (option `-l`; default: half the average read length) or when the percentage of remaining non-confident bases is higher than another specified threshold (option `-p`; default: 50%). Note that when HTS read discarding breaks PE, singletons are written into dedicated output files ( _.S.fastq_ file extension). + <span style="color:navy; font-size:1.1em;">**[T]**</span> Trimming and clipping (`-s T`) are performed using [_AlienTrimmer_](https://research.pasteur.fr/en/software/alientrimmer/) (Criscuolo and Brisse 2013). Clipping is carried out based on the specified alien oligonucleotides (option `-a`), where alien oligonucleotide sequences can be (i) set using precomputed standard library names, (ii) specified via user-defined FASTA-formatted file, or (iii) directly estimated from the input files using [_AlienDiscover_](https://gitlab.pasteur.fr/GIPhy/AlienDiscover) (option `-a AUTO`). When step T is run without setting option `-a`, clipping is carried out with the four homopolymers (`POLY`) as alien oligonucleotides. Trimming is carried out by deleting 5' and 3' regions containing many non-confident bases, where a base is considered as non-confident when its Phred score is lower than a Phred score threshold (set using option `-q`; default: 15). After trimming/clipping an HTS read, it can be discarded when the number of remaining bases is lower than a specified length threshold (option `-l`; default: half the average read length) or when the percentage of remaining non-confident bases is higher than another specified threshold (option `-p`; default: 50%). Note that when HTS read discarding breaks PE, singletons are written into dedicated output files ( _.S.fastq_ file extension). * Each predefined set of alien oligonucleotide sequences can be displayed using option `-d`. Some sets of alien oligonucleotide sequences are derived from _'Illumina Adapter Sequences'_ [Document # 1000000002694 v16](https://emea.support.illumina.com/downloads/illumina-adapter-sequences-document-1000000002694.html), i.e. options `-a NEXTERA` (_Nextera DNA Indexes_), `-a IUDI` (_IDT for Illumina UD Indexes_), `-a AMPLISEQ` (_AmpliSeq for Illumina Panels_), `-a TRUSIGHT_PANCANCER` (_TruSight RNA Pan-Cancer Panel_), `-a TRUSEQ_UD` (_IDT for Illumina-TruSeq DNA and RNA UD Indexes_), `-a TRUSEQ_CD` (_TruSeq DNA and RNA CD Indexes_), `-a TRUSEQ_SINGLE` (_TruSeq Single Indexes_), and `-a TRUSEQ_SMALLRNA` (_TruSeq Small RNA_). <br> <sup><sub>**[Oligonucleotide sequences © 2021 Illumina, Inc. All rights reserved. Derivative works created by Illumina customers are authorized for use with Illumina instruments and products only. All other uses are strictly prohibited.]**</sub></sup> diff --git a/fqCleanER.sh b/fqCleanER.sh index 7cc8de72b9c1bc7e4dfaed0c4166e8070a5f792c..d0b1b363769932ebe03fd0ca25945f18dd9fa95e 100755 --- a/fqCleanER.sh +++ b/fqCleanER.sh @@ -31,7 +31,10 @@ # = VERSIONS = # # ============ # # # - VERSION=23.07 # + VERSION=23.12 # +# + fixed bug in fqinit() when dealing with Phred lines containing only '?' (e.g. SRR26898806) # +# # +# VERSION=23.07 # # + replacing MINION with ALIENDISCOVER (-a AUTO) # # + able to deal with GXARGS on OS X # # + mvf() now considers empty infiles # @@ -90,7 +93,7 @@ # ================ # # # # - SUMMARY - -# gawk/5.0.1 bzip2/1.0.6 dsrc/2.0.2 AlienDiscover/0.1 AlienRemover/1.0 AlienTrimmer/2.0 FLASH/1.2.11 fqtools/1.2 musket/1.1 ntCard/1.2.2 ROCK/2.0.0 +# AlienDiscover/0.1 AlienRemover/1.0 AlienTrimmer/2.1 bzip2/1.0.6 dsrc/2.0.2 fqtools/1.2 FLASH/1.2.11 gawk/5.0.1 musket/1.1 ntCard/1.2.2 ROCK/2.0.0 # # # -- gawk ------------------------------------------------------------------------------------------------- # # # @@ -173,14 +176,17 @@ if [ ! $(command -v $FQCONVERT_BIN) ]; then echo "$FQCONVERT_BIN not found" >&2 ; exit 1 ; fi FQCONVERT_STATIC_OPTIONS="-n 100000 "; FQCONVERT="$FQCONVERT_BIN $FQCONVERT_STATIC_OPTIONS"; + FQDUPLICATE_BIN=fqduplicate; if [ ! $(command -v $FQDUPLICATE_BIN) ]; then echo "$FQDUPLICATE_BIN not found" >&2 ; exit 1 ; fi FQDUPLICATE_STATIC_OPTIONS=""; FQDUPLICATE="$FQDUPLICATE_BIN $FQDUPLICATE_STATIC_OPTIONS"; + FQEXTRACT_BIN=fqextract; if [ ! $(command -v $FQEXTRACT_BIN) ]; then echo "$FQEXTRACT_BIN not found" >&2 ; exit 1 ; fi FQEXTRACT_STATIC_OPTIONS=""; FQEXTRACT="$FQEXTRACT_BIN $FQEXTRACT_STATIC_OPTIONS"; + FQSTATS_BIN=fqstats; if [ ! $(command -v $FQSTATS_BIN) ]; then echo "$FQSTATS_BIN not found" >&2 ; exit 1 ; fi FQSTATS_STATIC_OPTIONS=""; @@ -3067,18 +3073,18 @@ fqinit() { ## PE lib 1 if [ "$I11,$I12" != "$NA2" ] then - case ${I11##*.} in - fastq|fq) { tr -d '\r' < $I11 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf11 & } 2>/dev/null ;; - gz) { $GUNZIP -c $I11 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf11 & } 2>/dev/null ;; - bz|bz2) { $BUNZIP2 -c $I11 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf11 & } 2>/dev/null ;; - dsrc|dsrc2) { $DSRC2 d -t$4 -s $I11 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf11 ; } 2>/dev/null ;; + case ${I11##*.} in # [--------- simplified header ---------] [------------ rewriting "^\\?*$" Phred lines ------------] + fastq|fq) { tr -d '\r' < $I11 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf11 & } 2>/dev/null ;; + gz) { $GUNZIP -c $I11 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf11 & } 2>/dev/null ;; + bz|bz2) { $BUNZIP2 -c $I11 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf11 & } 2>/dev/null ;; + dsrc|dsrc2) { $DSRC2 d -t$4 -s $I11 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf11 ; } 2>/dev/null ;; esac while [ $(jobs -r | wc -l) -ge $4 ]; do sleep $WAITIME ; done case ${I12##*.} in - fastq|fq) { tr -d '\r' < $I12 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf12 & } 2>/dev/null ;; - gz) { $GUNZIP -c $I12 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf12 & } 2>/dev/null ;; - bz|bz2) { $BUNZIP2 -c $I12 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf12 & } 2>/dev/null ;; - dsrc|dsrc2) { $DSRC2 d -t$4 -s $I12 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf12 ; } 2>/dev/null ;; + fastq|fq) { tr -d '\r' < $I12 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf12 & } 2>/dev/null ;; + gz) { $GUNZIP -c $I12 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf12 & } 2>/dev/null ;; + bz|bz2) { $BUNZIP2 -c $I12 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf12 & } 2>/dev/null ;; + dsrc|dsrc2) { $DSRC2 d -t$4 -s $I12 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf12 ; } 2>/dev/null ;; esac while [ $(jobs -r | wc -l) -ge $4 ]; do sleep $WAITIME ; done else @@ -3088,17 +3094,17 @@ fqinit() { if [ "$I21,$I22" != "$NA2" ] then case ${I21##*.} in - fastq|fq) { tr -d '\r' < $I21 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf21 & } 2>/dev/null ;; - gz) { $GUNZIP -c $I21 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf21 & } 2>/dev/null ;; - bz|bz2) { $BUNZIP2 -c $I21 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf21 & } 2>/dev/null ;; - dsrc|dsrc2) { $DSRC2 d -t$4 -s $I21 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf21 ; } 2>/dev/null ;; + fastq|fq) { tr -d '\r' < $I21 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf21 & } 2>/dev/null ;; + gz) { $GUNZIP -c $I21 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf21 & } 2>/dev/null ;; + bz|bz2) { $BUNZIP2 -c $I21 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf21 & } 2>/dev/null ;; + dsrc|dsrc2) { $DSRC2 d -t$4 -s $I21 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf21 ; } 2>/dev/null ;; esac while [ $(jobs -r | wc -l) -ge $4 ]; do sleep $WAITIME ; done case ${I22##*.} in - fastq|fq) { tr -d '\r' < $I22 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf22 & } 2>/dev/null ;; - gz) { $GUNZIP -c $I22 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf22 & } 2>/dev/null ;; - bz|bz2) { $BUNZIP2 -c $I22 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf22 & } 2>/dev/null ;; - dsrc|dsrc2) { $DSRC2 d -t$4 -s $I22 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf22 ; } 2>/dev/null ;; + fastq|fq) { tr -d '\r' < $I22 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf22 & } 2>/dev/null ;; + gz) { $GUNZIP -c $I22 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf22 & } 2>/dev/null ;; + bz|bz2) { $BUNZIP2 -c $I22 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf22 & } 2>/dev/null ;; + dsrc|dsrc2) { $DSRC2 d -t$4 -s $I22 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf22 ; } 2>/dev/null ;; esac while [ $(jobs -r | wc -l) -ge $4 ]; do sleep $WAITIME ; done else @@ -3108,17 +3114,17 @@ fqinit() { if [ "$I31,$I32" != "$NA2" ] then case ${I31##*.} in - fastq|fq) { tr -d '\r' < $I31 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf31 & } 2>/dev/null ;; - gz) { $GUNZIP -c $I31 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf31 & } 2>/dev/null ;; - bz|bz2) { $BUNZIP2 -c $I31 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf31 & } 2>/dev/null ;; - dsrc|dsrc2) { $DSRC2 d -t$4 -s $I31 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/1";sub("/1/1$","/1",h);print h"\n"$2"\n+\n"$4}' > $tf31 ; } 2>/dev/null ;; + fastq|fq) { tr -d '\r' < $I31 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf31 & } 2>/dev/null ;; + gz) { $GUNZIP -c $I31 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf31 & } 2>/dev/null ;; + bz|bz2) { $BUNZIP2 -c $I31 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf31 & } 2>/dev/null ;; + dsrc|dsrc2) { $DSRC2 d -t$4 -s $I31 | paste - - - - | $TAWK '{sub(" .*","/1",$1);sub("/1/1$","/1",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf31 ; } 2>/dev/null ;; esac while [ $(jobs -r | wc -l) -ge $4 ]; do sleep $WAITIME ; done case ${I32##*.} in - fastq|fq) { tr -d '\r' < $I32 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf32 & } 2>/dev/null ;; - gz) { $GUNZIP -c $I32 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf32 & } 2>/dev/null ;; - bz|bz2) { $BUNZIP2 -c $I32 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf32 & } 2>/dev/null ;; - dsrc|dsrc2) { $DSRC2 d -t$4 -s $I32 | paste - - - - | $TAWK '{sub(" .*","",$1);h=$1"/2";sub("/2/2$","/2",h);print h"\n"$2"\n+\n"$4}' > $tf32 ; } 2>/dev/null ;; + fastq|fq) { tr -d '\r' < $I32 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf32 & } 2>/dev/null ;; + gz) { $GUNZIP -c $I32 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf32 & } 2>/dev/null ;; + bz|bz2) { $BUNZIP2 -c $I32 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf32 & } 2>/dev/null ;; + dsrc|dsrc2) { $DSRC2 d -t$4 -s $I32 | paste - - - - | $TAWK '{sub(" .*","/2",$1);sub("/2/2$","/2",$1);gsub("\\?{37}","789:;<=>?@ABCDEFGHIHGFEDCBA@?>=<;:987",$4);print $1"\n"$2"\n+\n"$4}' > $tf32 ; } 2>/dev/null ;; esac while [ $(jobs -r | wc -l) -ge $4 ]; do sleep $WAITIME ; done else