diff --git a/README.md b/README.md index aa03c6ce233c94639f06315546b3d6fcdbed077a..1051727b5236000e879d2c543cb7a6f1687e1ff0 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,11 @@ Run _wgetGenBankWGS_ with the following command line model: Run _wgetGenBankWGS_ without option to read the following documentation: ``` - wgetGenBankWGS Copyright (C) 2019-2021 Institut Pasteur + wgetGenBankWGS Copyright (C) 2019-2021 Institut Pasteur - Downloading sequence files corresponding to selected entries from genome assembly report files: - GenBank: ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_genbank.txt - RefSeq: ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt + Downloading sequence files corresponding to selected entries from genome assembly report files (option -d): + GenBank: ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_genbank.txt + RefSeq: ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt Selected entries (options -e and -v) can be restricted to a specific phylum using option -p: -p A archaea diff --git a/wgetGenBankWGS.sh b/wgetGenBankWGS.sh index 479a16b32e6aa67ef0483b48a99b995e68ec82e2..e50eaead29c3975d225362b7009d24e6a6c62bf1 100755 --- a/wgetGenBankWGS.sh +++ b/wgetGenBankWGS.sh @@ -33,7 +33,10 @@ # = VERSIONS = # # ============ # # # - VERSION=0.6.201018ac # + VERSION=0.7.211026ac # +# + takes into account the new protocol https in field ftp_path of the genome assembly report files # +# # +# VERSION=0.6.211018ac # # + takes into account the last field 'asm_not_live_date' in genome assembly report files # # + adding option -p to select a specific phylum # # # @@ -71,11 +74,11 @@ if [ "$1" = "-?" ] || [ "$1" = "-h" ] || [ $# -le 1 ] then # cat <<EOF - wgetGenBankWGS v.$VERSION Copyright (C) 2019-2021 Institut Pasteur + wgetGenBankWGS v.$VERSION Copyright (C) 2019-2021 Institut Pasteur - Downloading sequence files corresponding to selected entries from genome assembly report files: - GenBank: ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_genbank.txt - RefSeq: ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt + Downloading sequence files corresponding to selected entries from genome assembly report files (option -d): + GenBank: ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_genbank.txt + RefSeq: ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt Selected entries (options -e and -v) can be restricted to a specific phylum using option -p: -p A archaea @@ -151,6 +154,7 @@ fi # =============== # # # # = PROTOCOL can be either "ftp:" or "https"; however, "https:" is generally faster ====================== # +# = however since Sep. 2021, the default protocol is now "https" ====================== # # # PROTOCOL="https:"; # # @@ -314,7 +318,7 @@ if [ "$EXCLUDE_PATTERN" != "^#" ]; then echo "exclusion criterion: $EXCLUDE_PATT tmp=$(randomfile $SUMMARY); mv $SUMMARY $tmp ; sed -n '2p' $tmp > $SUMMARY ; -sed '1,2d' $tmp | grep -E "$INCLUDE_PATTERN" | grep -v -E "$EXCLUDE_PATTERN" | grep -F "ftp://ftp.ncbi.nlm.nih.gov" >> $SUMMARY ; +sed '1,2d' $tmp | grep -E "$INCLUDE_PATTERN" | grep -v -E "$EXCLUDE_PATTERN" | grep -F "ftp.ncbi.nlm.nih.gov" >> $SUMMARY ; rm $tmp ; n=$(grep -v -c "^#" $SUMMARY); echo "$REPOSITORY: $n entries" ;