Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Rayan CHIKHI
serratus-batch-dl
Commits
896fc987
Commit
896fc987
authored
Jun 16, 2020
by
Rayan CHIKHI
Browse files
robustness increase, also got rid of nonzero vcpu desire
parent
7f26e6c3
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/Dockerfile
View file @
896fc987
...
...
@@ -32,6 +32,7 @@ RUN wget --quiet https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRATOOLKITVERSION}
# https://github.com/ababaian/serratus/blob/5d288765b6e22bf7ba1b69148e0013d65560b968/containers/serratus-dl/Dockerfile#L51
RUN
mkdir
-p
/root/.ncbi
RUN
wget
-O
/root/.ncbi/user-settings.mkfg https://raw.githubusercontent.com/ababaian/serratus/master/containers/serratus-dl/VDB_user-settings.mkfg
RUN
sed
-i
"s/
\/
root
\/
ncbi/
\/
mnt
\/
serratus-data/g"
/root/.ncbi/user-settings.mkfg
RUN
vdb-config
--report-cloud-identity
yes
# https://github.com/ababaian/serratus/blob/5d288765b6e22bf7ba1b69148e0013d65560b968/containers/serratus-dl/serratus-dl.sh#L167
RUN
DLID
=
"
$(
cat
/dev/urandom |
tr
-dc
'a-z0-9'
|
fold
-w
8 |
head
-n
1
)
-
$(
cat
/dev/urandom |
tr
-dc
'a-z0-9'
|
fold
-w
4 |
head
-n
1
)
-
$(
cat
/dev/urandom |
tr
-dc
'a-z0-9'
|
fold
-w
4 |
head
-n
1
)
-
$(
cat
/dev/urandom |
tr
-dc
'a-z0-9'
|
fold
-w
4 |
head
-n
1
)
-
$(
cat
/dev/urandom |
tr
-dc
'a-z0-9'
|
fold
-w
12 |
head
-n
1
)
"
&&
sed
-i
"s/52e8a8fe-0cac-4bf2-983a-3617cdba7df5/
$DLID
/g"
/root/.ncbi/user-settings.mkfg
...
...
src/batch_processor.py
View file @
896fc987
...
...
@@ -24,26 +24,31 @@ def process_file(accession, region):
startTime
=
datetime
.
now
()
os
.
system
(
' '
.
join
([
"df"
,
"-T"
]))
os
.
system
(
' '
.
join
([
"cat"
,
"/dfT.txt"
]))
os
.
system
(
' '
.
join
([
"lsblk"
]))
os
.
system
(
' '
.
join
([
"cat"
,
"/lsblk.txt"
]))
os
.
system
(
' '
.
join
([
"echo"
,
"contents of /root/.ncbi/user-settings.mkfg"
]))
os
.
system
(
' '
.
join
([
"cat"
,
"/root/.ncbi/user-settings.mkfg"
]))
os
.
system
(
' '
.
join
([
"echo"
,
"EOF"
]))
# go to /tmp (that's where local storage / nvme is)
# go to /data instead (EBS)
# on second thought..
# go to EBS instead
os
.
chdir
(
"/mnt/serratus-data"
)
os
.
system
(
' '
.
join
([
"pwd"
]))
# check free space
os
.
system
(
' '
.
join
([
"df"
,
"-h"
,
"."
]))
# some debug
public_sra_files
=
os
.
listdir
(
os
.
getcwd
()
+
"/public/sra/"
)
print
(
"at start, dir listing of public/sra/"
,
public_sra_files
)
# download reads from accession
os
.
system
(
'mkdir -p out/'
)
os
.
system
(
'prefetch '
+
accession
)
os
.
system
(
'/parallel-fastq-dump --split-files --outdir out/ --threads 4 --sra-id '
+
accession
)
files
=
os
.
listdir
(
os
.
getcwd
()
+
"/out/"
)
print
(
"after fastq-dump, dir listing"
,
files
)
print
(
"after fastq-dump, dir listing
of out/
"
,
files
)
inputDataFn
=
accession
+
".inputdata.txt"
g
=
open
(
inputDataFn
,
"w"
)
for
f
in
files
:
...
...
@@ -66,9 +71,10 @@ def process_file(accession, region):
outputBucket
=
"serratus-rayan"
s3
.
upload_file
(
accession
+
".fastq"
,
outputBucket
,
"reads/"
+
accession
+
".fastq"
)
# cleanup. important
! otherwise files stay on
local drive
# cleanup.
#(
important
when using a
local drive
)
os
.
system
(
' '
.
join
([
"rm"
,
"-f"
,
"out/"
+
accession
+
"*.fastq"
]))
os
.
system
(
' '
.
join
([
"rm"
,
"-f"
,
accession
+
".fastq"
]))
os
.
system
(
' '
.
join
([
"rm"
,
"-f"
,
"public/sra/"
+
accession
+
".sra"
]))
endTime
=
datetime
.
now
()
diffTime
=
endTime
-
startTime
...
...
template/template.yaml
View file @
896fc987
...
...
@@ -136,8 +136,8 @@ Resources:
Type
:
MANAGED
ComputeResources
:
Type
:
SPOT
MinvCpus
:
4
DesiredvCpus
:
4
MinvCpus
:
0
DesiredvCpus
:
0
MaxvCpus
:
1000
AllocationStrategy
:
SPOT_CAPACITY_OPTIMIZED
InstanceTypes
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment