diff --git a/wgetGenBankWGS.sh b/wgetGenBankWGS.sh index 335fa9a00eba3016a2d5cbbbac679863440efd11..cb1580cafcdec176884ed5741cdc75ddef62cd6f 100755 --- a/wgetGenBankWGS.sh +++ b/wgetGenBankWGS.sh @@ -33,7 +33,11 @@ # = VERSIONS = # # ============ # # # - VERSION=0.2.190228ac # + VERSION=0.3.190613ac # +# + no test between ftp and http protocols; use directly http # +# + fixed bug when the specified pattern has no match # +# # +# VERSION=0.2.190228ac # # + option -d for downloading from either genbank or refseq # # + option -t to get the type strain name(s) for each selected species # # # @@ -210,20 +214,12 @@ ASSEMBLY_REPORT=ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_$ if [ ! -e $OUTDIR ]; then echo "creating output directory: $OUTDIR" ; mkdir $OUTDIR ; fi SUMMARY=$OUTDIR/summary.txt; -t=$(( $(date +%s%N) / 1000000 )); -while [ 1 ] -do - wget $WGETOPT -O - ftp://$ASSEMBLY_REPORT | sed -n '2p' ; - [ $? == 0 ] && break || sleep 1 ; -done > $SUMMARY -f=$(( $(date +%s%N) / 1000000 - $t )); +PROTOCOL="https:"; while [ 1 ] do - wget $WGETOPT -O - https://$ASSEMBLY_REPORT | sed -n '2p' ; + wget $WGETOPT -O - $PROTOCOL"//"$ASSEMBLY_REPORT > $SUMMARY ; [ $? == 0 ] && break || sleep 1 ; -done > $SUMMARY -h=$(( $(date +%s%N) / 1000000 - $f - $t )); -[ $h -lt $f ] && PROTOCOL="https:" || PROTOCOL="ftp:"; +done ############################################################################################################# @@ -234,12 +230,10 @@ h=$(( $(date +%s%N) / 1000000 - $f - $t )); echo "selection criterion: $INCLUDE_PATTERN" ; if [ "$EXCLUDE_PATTERN" != "^#" ]; then echo "exclusion criterion: $EXCLUDE_PATTERN" ; fi tmp=$(randomfile $SUMMARY); -while [ 1 ] -do - wget $WGETOPT -O - $PROTOCOL"//"$ASSEMBLY_REPORT | grep -E "$INCLUDE_PATTERN" | grep -v -E "$EXCLUDE_PATTERN" | grep -F "ftp://ftp.ncbi.nlm.nih.gov" > $tmp ; - if [ $? == 0 ]; then cat $tmp >> $SUMMARY ; rm -f $tmp ; break; fi - sleep 1 ; -done +mv $SUMMARY $tmp ; +sed -n '2p' $tmp > $SUMMARY ; +sed '1,2d' $tmp | grep -E "$INCLUDE_PATTERN" | grep -v -E "$EXCLUDE_PATTERN" | grep -F "ftp://ftp.ncbi.nlm.nih.gov" >> $SUMMARY ; +rm -f $tmp ; n=$(grep -v -c "^#" $SUMMARY); echo "$REPOSITORY: $n WGS nucleotide sequence FASTA files" ; if [ $n -eq 0 ]; then exit 0 ; fi