Skip to content
Snippets Groups Projects
Commit 94e47eec authored by Alexis  CRISCUOLO's avatar Alexis CRISCUOLO :black_circle:
Browse files

4.0

parent 8f289ad4
No related branches found
No related tags found
No related merge requests found
......@@ -27,33 +27,34 @@ Execute _wgetENAHTS_ with the following command line model:
Run _wgetENAHTS_ without option to read the following documentation:
```
USAGE: wgetENAHTS.sh [[-o <dir>] [-f <infile>]
[-t <nthreads>] [-r <rate>] [-n] [-h]] [<accn> ...]
USAGE: wgetENAHTS.sh [[-o <dir>] [-f <infile>] [-t <nthreads>]
[-p <protocol>] [-r <rate>] [-n] [-h]] [<accn> ...]
Downloads FASTQ files corresponding to the specified DRR/ERR/SRR accession(s)
Files are downloaded from the ENA ftp repository ftp.sra.ebi.ac.uk/vol1/fastq
OPTIONS:
-o <dir> output directory (default: .)
-f <file> to read accession(s) from the specified file (default: all the last
arguments)
-t <int> number of thread(s) (default: 2)
-r <int> maximum download rate per file (in kb per seconds; default: entire
available bandwidth)
-n no file download, only check (default: not set)
-h prints this help and exits
-o <dir> output directory (default: .)
-f <file> to read accession(s) from the specified file (default: all the last
arguments)
-t <int> number of thread(s) (default: 2)
-p <string> force the transfer protocol, either ftp or https (default: auto)
-r <int> maximum download rate per file, in kb per seconds (default: entire
available bandwidth)
-n no file download, only check (default: not set)
-h prints this help and exits
EXAMPLES:
+ downloading the SE FASTQ file corresponding to accession DRR000003:
wgetENAHTS.sh DRR000003
+ downloading the FASTQ files corresponding to accessions ERR000001 and ERR000004:
+ downloading FASTQ files corresponding to accessions ERR000001 and ERR000004:
wgetENAHTS.sh ERR000001 ERR000004
+ assessing the repository existence for accessions SRR9870010-39:
wgetENAHTS.sh -n SRR98700{10..39}
+ downloading the FASTQ files (if any) corresponding to accessions SRR9870010-39:
+ downloading FASTQ files (if any) corresponding to accessions SRR9870010-39:
wgetENAHTS.sh SRR98700{10..39}
+ same as above with (at most) 6 parallel downloads and saved outputs:
......
......@@ -105,21 +105,22 @@ mandoc() {
echo -e "\n\033[1m wgetENAHTS v$VERSION $COPYRIGHT\033[0m";
cat <<EOF
USAGE: wgetENAHTS.sh [[-o <dir>] [-f <infile>]
[-t <nthreads>] [-r <rate>] [-n] [-h]] [<accn> ...]
USAGE: wgetENAHTS.sh [[-o <dir>] [-f <infile>] [-t <nthreads>]
[-p <protocol>] [-r <rate>] [-n] [-h]] [<accn> ...]
Downloads FASTQ files corresponding to the specified DRR/ERR/SRR accession(s)
Files are downloaded from the ENA ftp repository ftp.sra.ebi.ac.uk/vol1/fastq
OPTIONS:
-o <dir> output directory (default: .)
-f <file> to read accession(s) from the specified file (default: all the last
arguments)
-t <int> number of thread(s) (default: 2)
-r <int> maximum download rate per file (in kb per seconds; default: entire
available bandwidth)
-n no file download, only check (default: not set)
-h prints this help and exits
-o <dir> output directory (default: .)
-f <file> to read accession(s) from the specified file (default: all the last
arguments)
-t <int> number of thread(s) (default: 2)
-p <string> force the transfer protocol, either ftp or https (default: auto)
-r <int> maximum download rate per file, in kb per seconds (default: entire
available bandwidth)
-n no file download, only check (default: not set)
-h prints this help and exits
EXAMPLES:
+ downloading the SE FASTQ file corresponding to accession DRR000003:
......@@ -158,7 +159,7 @@ EOF
# #
WGET_BIN=wget;
[ ! $(command -v $WGET_BIN) ] && echoxit "no $WGET_BIN detected" ;
WGET_STATIC_OPTIONS="--no-check-certificate --retry-connrefused --random-wait --quiet";
WGET_STATIC_OPTIONS="--quiet --retry-connrefused --no-check-certificate";
WGET="$WGET_BIN $WGET_STATIC_OPTIONS";
# #
# -- gzip ------------------------------------------------------------------------------------------------- #
......@@ -180,28 +181,28 @@ EOF
if [ $# -lt 1 ]; then mandoc ; exit 1 ; fi
FILE_REPORT="https://www.ebi.ac.uk/ena/portal/api/filereport?download=true&result=read_run&accession=";
WGET_DWNL="$WGET --continue --no-directories";
WGET_READ="$WGET --output-document -";
WGET_DWNL="$WGET --read-timeout=200 --continue --no-directories";
WGET_READ="$WGET --read-timeout=200 --output-document -";
WGET_TEST="$WGET --spider";
NTHREADS=2;
OUTDIR=".";
INFILE="_N.A_";
MAXRATE="NA";
WAITIME=0;
PROTOCOL="auto";
DWNL=true;
while getopts t:o:f:r:w:nh option
while getopts t:o:f:r:p:nh option
do
case $option in
t) NTHREADS=$OPTARG ;;
o) OUTDIR="$OPTARG" ;;
f) INFILE="$OPTARG" ;;
r) MAXRATE=$OPTARG ;;
w) WAITIME=$OPTARG ;;
n) DWNL=false ;;
h) mandoc ; exit 0 ;;
\?) mandoc ; exit 1 ;;
t) NTHREADS=$OPTARG ;;
o) OUTDIR="$OPTARG" ;;
f) INFILE="$OPTARG" ;;
r) MAXRATE=$OPTARG ;;
p) PROTOCOL="$OPTARG" ;;
n) DWNL=false ;;
h) mandoc ; exit 0 ;;
\?) mandoc ; exit 1 ;;
esac
done
shift "$(( $OPTIND - 1 ))"
......@@ -233,15 +234,16 @@ then
[ $MAXRATE -lt 1 ] && MAXRATE=1;
WGET_DWNL="$WGET_DWNL --limit-rate=$MAXRATE"k;
fi
[[ $NTHREADS =~ ^[0-9]+$ ]] || echoxit "incorrect value (option -t): $NTHREADS" ;
[ $NTHREADS -lt 1 ] && NTHREADS=1;
[[ $WAITIME =~ ^[0-9]+$ ]] || echoxit "incorrect value (option -w): $WAITIME" ;
if [ $WAITIME -eq 0 ]
if [ "$PROTOCOL" != "auto" ] && [ "$PROTOCOL" != "ftp" ] && [ "$PROTOCOL" != "https" ]
then
wt=0; while [ $(( $wt * $wt )) -lt $NTHREADS ]; do let wt++ ; done
WAITIME=$wt;
echoxit "transfer protocol should be either ftp or https (option -p): $PROTOCOL" ;
fi
[ $WAITIME -lt 1 ] && WAITIME=1;
[[ $NTHREADS =~ ^[0-9]+$ ]] || echoxit "incorrect value (option -t): $NTHREADS" ;
[ $NTHREADS -lt 1 ] && NTHREADS=1;
wt=0; while [ $(( $wt * $wt )) -lt $NTHREADS ]; do let wt++ ; done
WAITIME=$wt;
[ $WAITIME -lt 1 ] && WAITIME=1;
##############################################################################################################
......@@ -266,24 +268,28 @@ echo ;
##############################################################################################################
#### ####
#### CHECKING PROTOCOL ####
#### ASSESSING TRANSFER PROTOCOL ####
#### ####
##############################################################################################################
URL="ftp.sra.ebi.ac.uk/vol1/fastq";
echo -n -e "$(chrono)\t\tchecking protocol " ;
time_ftp=$SECONDS;
for i in {1..5} ; do echo -n "." ; timeout 2 $WGET_TEST "ftp://$URL/DRR00$i/" &>/dev/null ; rm -f wget-log ; done
time_ftp=$(( $SECONDS - $time_ftp ));
time_https=$SECONDS;
for i in {1..5} ; do echo -n "." ; timeout 2 $WGET_TEST "https://$URL/DRR00$i/" &>/dev/null ; rm -f wget-log ; done
time_https=$(( $SECONDS - $time_https ));
echo " [ok]" ;
if [ $time_ftp -lt $time_https ]
then FTPENA="ftp://$URL"; echo -e "$(chrono)\t\tselected protocol: ftp ($time_ftp:$time_https)" ;
else FTPENA="https://$URL"; echo -e "$(chrono)\t\tselected protocol: https ($time_https:$time_ftp)" ;
if [ "$PROTOCOL" == "auto" ]
then
echo -n -e "$(chrono)\t\tassessing transfer protocol " ;
PROTOCOL="ftp://"; time_ftp=$SECONDS;
for i in {1..5} ; do echo -n "." ; timeout 2 $WGET_TEST "$PROTOCOL$URL/DRR00$i/" &>/dev/null ; rm -f wget-log ; done
time_ftp=$(( $SECONDS - $time_ftp ));
PROTOCOL="https://"; time_https=$SECONDS;
for i in {1..5} ; do echo -n "." ; timeout 2 $WGET_TEST "$PROTOCOL$URL/DRR00$i/" &>/dev/null ; rm -f wget-log ; done
time_https=$(( $SECONDS - $time_https ));
echo " [ok]" ;
if [ $time_ftp -lt $time_https ]
then PROTOCOL="ftp://"; echo -e "$(chrono)\t\tselected protocol: ftp ($time_ftp<$time_https)" ;
else PROTOCOL="https://"; echo -e "$(chrono)\t\tselected protocol: https ($time_https<$time_ftp)" ;
fi
echo ;
fi
echo ;
FTPENA="$PROTOCOL$URL" ;
##############################################################################################################
......@@ -303,7 +309,7 @@ do
if $DWNL && ls $OUTFQ &>/dev/null
then
echo -e "[$C/$NA]\t\t\033[34mfile(s) already exist(s) for accession $ACCN\033[0m" >&2 ;
stat -c "%s %n" $OUTFQ | while read s n ; do echo -e "\033[90m$n\t[$(fb $s)]\033[0m" >&2 ; done
stat -c "%s %n" $OUTFQ | while read s n ; do echo -e "\t\t\033[90m$n\t[$(fb $s)]\033[0m" >&2 ; done
continue ;
fi
nc=${#ACCN};
......@@ -353,14 +359,6 @@ echo ;
if [ $DL -eq 0 ]; then exit 0 ;
elif [ $DL -eq 1 ]; then echo -e -n "$(chrono)\t\t$DL valid accession; " ;
else echo -e -n "$(chrono)\t\t$DL valid accessions; " ; fi
##############################################################################################################
#### ####
#### DOWNLOADING FASTQ FILES ####
#### ####
##############################################################################################################
N=0;
for ACCN in $ACCNLIST
do
......@@ -368,13 +366,67 @@ do
nf=$(grep -c -F ".fastq.gz" $OUTDIR/$ACCN.weh);
N=$(( $N + $nf ));
done
if [ $N -eq 1 ]; then echo "$N file to download" ;
else echo "$N files to download" ; fi
echo ;
##############################################################################################################
#### ####
#### SORTING ACCESSIONS ####
#### ####
##############################################################################################################
ACCNSORT="$(for ACCN in $ACCNLIST ; do [ -s $OUTDIR/$ACCN.weh ] && echo -e "$(sed 's/ /\t/g' $OUTDIR/$ACCN.weh | cut -f2 | paste -sd+ | bc -l)\t$ACCN" ; done | sort -gr | cut -f2 | tr '\n' ' ')";
##############################################################################################################
#### ####
#### MEASURING DOWNLOAD SPEED ####
#### ####
##############################################################################################################
# if [ "$MAXRATE" == "NA" ]
# then
# echo -e -n "$(chrono)\t\testimating download speed " ;
# for ACCN in $ACCNSORT
# do
# echo -n "." ;
# if [ ! -s $OUTDIR/$ACCN.weh ]; then continue ; fi
# nc=${#ACCN};
# if [ $nc -eq 9 ]; then URL="$FTPENA/${ACCN:0:6}/$ACCN/";
# elif [ $nc -eq 10 ]; then URL="$FTPENA/${ACCN:0:6}/00${ACCN:9:1}/$ACCN/";
# else URL="$FTPENA/${ACCN:0:6}/0${ACCN:9:2}/$ACCN/";
# fi
# echo -n "." ;
# for FQGZ in $(sed "s/.*$ACCN/$ACCN/g" $OUTDIR/$ACCN.weh)
# do
# echo -n "." ;
# timeout 30 $WGET_DWNL $URL$FQGZ &>/dev/null ;
# bytes=$(du -b $OUTDIR/$FQGZ | tail -1 | cut -f1);
# RATE=$(( $bytes / 30 ));
# rm -f wget-log ;
# break ;
# done
# break ;
# done
# echo " [ok]" ;
# if [ $N -lt $NTHREADS ]; then RATE=$(( $RATE / $N )); else RATE=$(( $RATE / $NTHREADS )); fi
# if [ $RATE -gt 10000 ]
# then
# WGET_DWNL="$WGET_DWNL --limit-rate=$RATE"
# echo -e "$(chrono)\t\tdownload speed per file: $RATE bytes/seconds" ;
# fi
# echo ;
# fi
##############################################################################################################
#### ####
#### DOWNLOADING FASTQ FILES ####
#### ####
##############################################################################################################
C=0;
for ACCN in $ACCNSORT
do
......@@ -388,7 +440,7 @@ do
do
let C++ ;
CMD1="echo -e \"[$C/$N]$'\t'$'\t'downloading$'\t'$FQGZ\" ; sleep $WAITIME";
CMD2="while true ; do $WGET_DWNL $URL$FQGZ && break ; done";
CMD2="while true ; do nice $WGET_DWNL $URL$FQGZ && break ; done";
CMD3="md5sum -b $OUTDIR/$FQGZ > $OUTDIR/$FQGZ.weh";
CMD4="s=\$(fb \$(stat -c %s $OUTDIR/$FQGZ))";
CMD5="echo \"[$C/$N]$'\t'$'\t'completed$'\t'$FQGZ$'\t'[\$s]\"";
......@@ -435,7 +487,7 @@ do
# rm -f $OUTDIR/$FQGZ ;
echo -e "\033[31m[WARNING]\t\tproblem with file $FQGZ\033[0m" >&2 ;
CMD1="echo \"[$C/$N]$'\t'$'\t'downloading$'\t'$FQGZ\" ; sleep $WAITIME";
CMD2="while true ; do $WGET_DWNL $URL$FQGZ && break ; done";
CMD2="while true ; do nice $WGET_DWNL $URL$FQGZ && break ; done";
CMD3="md5sum -b $OUTDIR/$FQGZ > $OUTDIR/$FQGZ.weh";
CMD4="s=\$(fb \$(stat -c %s $OUTDIR/$FQGZ))";
CMD5="echo \"[$C/$N]$'\t'$'\t'completed$'\t'$FQGZ$'\t'[\$s]\"";
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment