Commit 896fc987 authored by Rayan  CHIKHI's avatar Rayan CHIKHI
Browse files

robustness increase, also got rid of nonzero vcpu desire

parent 7f26e6c3
......@@ -32,6 +32,7 @@ RUN wget --quiet https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRATOOLKITVERSION}
# https://github.com/ababaian/serratus/blob/5d288765b6e22bf7ba1b69148e0013d65560b968/containers/serratus-dl/Dockerfile#L51
RUN mkdir -p /root/.ncbi
RUN wget -O /root/.ncbi/user-settings.mkfg https://raw.githubusercontent.com/ababaian/serratus/master/containers/serratus-dl/VDB_user-settings.mkfg
RUN sed -i "s/\/root\/ncbi/\/mnt\/serratus-data/g" /root/.ncbi/user-settings.mkfg
RUN vdb-config --report-cloud-identity yes
# https://github.com/ababaian/serratus/blob/5d288765b6e22bf7ba1b69148e0013d65560b968/containers/serratus-dl/serratus-dl.sh#L167
RUN DLID="$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1 )-$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 4 | head -n 1 )-$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 4 | head -n 1 )-$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 4 | head -n 1 )-$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 12 | head -n 1 )" && sed -i "s/52e8a8fe-0cac-4bf2-983a-3617cdba7df5/$DLID/g" /root/.ncbi/user-settings.mkfg
......
......@@ -24,26 +24,31 @@ def process_file(accession, region):
startTime = datetime.now()
os.system(' '.join(["df", "-T"]))
os.system(' '.join(["cat","/dfT.txt"]))
os.system(' '.join(["lsblk"]))
os.system(' '.join(["cat","/lsblk.txt"]))
os.system(' '.join(["echo","contents of /root/.ncbi/user-settings.mkfg"]))
os.system(' '.join(["cat","/root/.ncbi/user-settings.mkfg"]))
os.system(' '.join(["echo","EOF"]))
# go to /tmp (that's where local storage / nvme is)
# go to /data instead (EBS)
# on second thought..
# go to EBS instead
os.chdir("/mnt/serratus-data")
os.system(' '.join(["pwd"]))
# check free space
os.system(' '.join(["df", "-h","."]))
# some debug
public_sra_files = os.listdir(os.getcwd() + "/public/sra/")
print("at start, dir listing of public/sra/", public_sra_files)
# download reads from accession
os.system('mkdir -p out/')
os.system('prefetch '+accession)
os.system('/parallel-fastq-dump --split-files --outdir out/ --threads 4 --sra-id '+accession)
files = os.listdir(os.getcwd() + "/out/")
print("after fastq-dump, dir listing", files)
print("after fastq-dump, dir listing of out/", files)
inputDataFn = accession+".inputdata.txt"
g = open(inputDataFn,"w")
for f in files:
......@@ -66,9 +71,10 @@ def process_file(accession, region):
outputBucket = "serratus-rayan"
s3.upload_file(accession+".fastq", outputBucket, "reads/"+accession+".fastq")
# cleanup. important! otherwise files stay on local drive
# cleanup. #(important when using a local drive)
os.system(' '.join(["rm","-f","out/"+accession+"*.fastq"]))
os.system(' '.join(["rm","-f",accession+".fastq"]))
os.system(' '.join(["rm","-f","public/sra/"+accession+".sra"]))
endTime = datetime.now()
diffTime = endTime - startTime
......
......@@ -136,8 +136,8 @@ Resources:
Type: MANAGED
ComputeResources:
Type: SPOT
MinvCpus: 4
DesiredvCpus: 4
MinvCpus: 0
DesiredvCpus: 0
MaxvCpus: 1000
AllocationStrategy: SPOT_CAPACITY_OPTIMIZED
InstanceTypes:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment