Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Rayan CHIKHI
serratus-batch-dl
Commits
3dee182a
Commit
3dee182a
authored
Jun 16, 2020
by
Rayan CHIKHI
Browse files
switched from c5d local storage to a EBS
parent
05d642da
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/batch_processor.py
View file @
3dee182a
...
...
@@ -23,8 +23,9 @@ def process_file(accession, region):
print
(
"region - "
+
region
)
startTime
=
datetime
.
now
()
# go to /tmp (important, that's where local storage / nvme is)
os
.
chdir
(
"/tmp"
)
# go to /tmp (that's where local storage / nvme is)
# go to /data instead (EBS)
os
.
chdir
(
"/data"
)
os
.
system
(
' '
.
join
([
"pwd"
]))
# check free space
...
...
@@ -52,11 +53,17 @@ def process_file(accession, region):
if
os
.
stat
(
accession
+
".fastq"
).
st_size
==
0
:
print
(
"fastp produced empty output"
)
exit
(
1
)
print
(
"fastp done, now uploading to S3"
)
# upload filtered reads to s3
outputBucket
=
"serratus-rayan"
s3
.
upload_file
(
accession
+
".fastq"
,
outputBucket
,
"reads/"
+
accession
+
".fastq"
)
# cleanup. important! otherwise files stay on local drive
os
.
system
(
' '
.
join
([
"rm"
,
"-f"
,
"out/"
+
accession
+
"*.fastq"
]))
os
.
system
(
' '
.
join
([
"rm"
,
"-f"
,
accession
+
".fastq"
]))
endTime
=
datetime
.
now
()
diffTime
=
endTime
-
startTime
logMessage
(
accession
,
"Serratus-batch-dl processing time - "
+
str
(
diffTime
.
seconds
),
LOGTYPE_INFO
)
...
...
template/template.yaml
View file @
3dee182a
...
...
@@ -109,15 +109,16 @@ Resources:
- .dkr.ecr.
- Ref
:
AWS::Region
- ".amazonaws.com/serratus-dl-batch-job:latest"
Vcpus
:
2
Memory
:
4
000
Vcpus
:
4
Memory
:
6
000
MountPoints
:
-
ContainerPath
:
/tmp
SourceVolume
:
temp_dir
-
ContainerPath
:
"
/data"
ReadOnly
:
false
SourceVolume
:
data
Volumes
:
-
Host
:
SourcePath
:
/tmp
Name
:
temp_dir
-
Name
:
data
Host
:
SourcePath
:
"
/data"
RetryStrategy
:
Attempts
:
1
RayanSerratusDlBatchProcessingJobQueue
:
...
...
@@ -138,10 +139,9 @@ Resources:
MinvCpus
:
0
DesiredvCpus
:
0
MaxvCpus
:
1000
#
AllocationStrategy: SPOT_CAPACITY_OPTIMIZED
# maybe let's not activate cause i really want a c5d and nothing else
AllocationStrategy
:
SPOT_CAPACITY_OPTIMIZED
InstanceTypes
:
-
c5d
#- optimal
-
optimal
BidPercentage
:
100
SpotIamFleetRole
:
!Ref
SpotIamFleetRole
Subnets
:
...
...
@@ -162,8 +162,12 @@ Resources:
LaunchTemplateName
:
"
Special-inc-nvme-assembly"
LaunchTemplateData
:
BlockDeviceMappings
:
-
DeviceName
:
/dev/xvdcz
VirtualName
:
ephemeral0
-
DeviceName
:
'
/dev/sdb'
# Amazon ECS-Optimized Amazon Linux 2 ami accepts EDS vol /dev/sd[b-l]
Ebs
:
DeleteOnTermination
:
true
Encrypted
:
true
VolumeSize
:
1000
# high because of multitenancy
VolumeType
:
"
gp2"
UserData
:
"
Fn::Base64"
:
!Sub
|
MIME-Version: 1.0
...
...
@@ -171,42 +175,21 @@ Resources:
--==MYBOUNDARY==
Content-Type: text/x-shellscript; charset="us-ascii"
#!/bin/bash
# from https://forums.aws.amazon.com/message.jspa?messageID=867011
yum install -y rsync
# mount the ephemeral storage
mkfs.ext4 /dev/nvme1n1
mount -t ext4 /dev/nvme1n1 /mnt/
# make temp directory for containers usage
# should be used in the Batch job definition (MountPoints)
mkdir /mnt/tmp_ext
rsync -avPHSX /tmp/ /mnt/tmp_ext/
# modify fstab to mount /tmp on the new storage.
sed -i '$ a /mnt/tmp_ext /tmp none bind 0 0' /etc/fstab
#!/bin/bash
mkfs.ext4 /dev/sdb
(
echo n # Add a new partition
echo p # Primary partition
echo 1 # Partition number
echo # First sector (Accept default: 1)
echo # Last sector (Accept default: varies)
echo w # Write changes
) | fdisk /dev/sdb
mkfs.ext4 /dev/xvdb1
mkdir /data
echo " /dev/xvdb1 /data ext4 defaults 0 2" >> /etc/fstab
mount -a
# make /tmp usable by everyone
chmod 777 /mnt/tmp_ext
# Rayan: this makes the local drive go out of space, so I removed it and let's see
#
#service docker stop
## copy the docker directory to the ephemeral storage
#rsync -avPHSX /var/lib/docker/ /mnt/docker_ext/
## set the data directory to the ephemeral storage in the config file of the docker deamon
#DOCKER_CFG_FILE=/etc/docker/daemon.json
#if [ ! -e "${!DOCKER_CFG_FILE}" ]; then
# # need to create a non empty file for sed to work
# echo "{" > ${!DOCKER_CFG_FILE}
#else
# # replace the last } of the file by a ,
# sed -i s/}$/,/ ${!DOCKER_CFG_FILE}
#fi
#sed -i '$ a "data-root": "/mnt/docker_ext/"' ${!DOCKER_CFG_FILE}
#sed -i '$ a }' ${!DOCKER_CFG_FILE}
#service docker start
service docker restart
--==MYBOUNDARY==--
Outputs
:
...
...
template/template_c5d.yaml
0 → 100644
View file @
3dee182a
---
AWSTemplateFormatVersion
:
'
2010-09-09'
Description
:
'
Orchestrating
an
Application
Process
with
AWS
Batch
using
CloudFormation'
Resources
:
VPC
:
Type
:
AWS::EC2::VPC
Properties
:
CidrBlock
:
10.0.0.0/16
InternetGateway
:
Type
:
AWS::EC2::InternetGateway
RouteTable
:
Type
:
AWS::EC2::RouteTable
Properties
:
VpcId
:
Ref
:
VPC
VPCGatewayAttachment
:
Type
:
AWS::EC2::VPCGatewayAttachment
Properties
:
VpcId
:
Ref
:
VPC
InternetGatewayId
:
Ref
:
InternetGateway
SecurityGroup
:
Type
:
AWS::EC2::SecurityGroup
Properties
:
GroupDescription
:
EC2 Security Group for instances launched in the VPC by Batch
VpcId
:
Ref
:
VPC
Subnet
:
Type
:
AWS::EC2::Subnet
Properties
:
CidrBlock
:
10.0.0.0/24
VpcId
:
Ref
:
VPC
MapPublicIpOnLaunch
:
'
True'
Route
:
Type
:
AWS::EC2::Route
Properties
:
RouteTableId
:
Ref
:
RouteTable
DestinationCidrBlock
:
0.0.0.0/0
GatewayId
:
Ref
:
InternetGateway
SubnetRouteTableAssociation
:
Type
:
AWS::EC2::SubnetRouteTableAssociation
Properties
:
RouteTableId
:
Ref
:
RouteTable
SubnetId
:
Ref
:
Subnet
BatchServiceRole
:
Type
:
AWS::IAM::Role
Properties
:
AssumeRolePolicyDocument
:
Version
:
'
2012-10-17'
Statement
:
-
Effect
:
Allow
Principal
:
Service
:
batch.amazonaws.com
Action
:
sts:AssumeRole
ManagedPolicyArns
:
-
arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole
IamInstanceProfile
:
Type
:
AWS::IAM::InstanceProfile
Properties
:
Roles
:
-
Ref
:
EcsInstanceRole
EcsInstanceRole
:
Type
:
AWS::IAM::Role
Properties
:
AssumeRolePolicyDocument
:
Version
:
'
2008-10-17'
Statement
:
-
Sid
:
'
'
Effect
:
Allow
Principal
:
Service
:
ec2.amazonaws.com
Action
:
sts:AssumeRole
ManagedPolicyArns
:
-
arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role
-
arn:aws:iam::aws:policy/AmazonS3FullAccess
SpotIamFleetRole
:
# taken from https://github.com/aodn/aws-wps/blob/master/wps-cloudformation-template.yaml
Type
:
AWS::IAM::Role
Properties
:
AssumeRolePolicyDocument
:
Version
:
2012-10-17
Statement
:
-
Effect
:
Allow
Principal
:
Service
:
spot.amazonaws.com
Action
:
sts:AssumeRole
-
Effect
:
Allow
Principal
:
Service
:
spotfleet.amazonaws.com
Action
:
sts:AssumeRole
ManagedPolicyArns
:
-
arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetTaggingRole
RayanSerratusDlBatchProcessingJobDefinition
:
Type
:
AWS::Batch::JobDefinition
Properties
:
Type
:
container
JobDefinitionName
:
RayanSerratusDlBatchJobDefinition
ContainerProperties
:
Image
:
Fn::Join:
- ''
- - Ref
:
AWS::AccountId
- .dkr.ecr.
- Ref
:
AWS::Region
- ".amazonaws.com/serratus-dl-batch-job:latest"
# rationale: each c5d instance has 100 local GB per 4 CPU, and that's roughly the size we need
Vcpus
:
4
Memory
:
4000
MountPoints
:
-
ContainerPath
:
/tmp
SourceVolume
:
temp_dir
Volumes
:
-
Host
:
SourcePath
:
/tmp
Name
:
temp_dir
RetryStrategy
:
Attempts
:
1
RayanSerratusDlBatchProcessingJobQueue
:
Type
:
AWS::Batch::JobQueue
Properties
:
JobQueueName
:
RayanSerratusDlBatchProcessingJobQueue
Priority
:
1
ComputeEnvironmentOrder
:
-
Order
:
1
ComputeEnvironment
:
Ref
:
RayanSerratusDlComputeEnvironment
RayanSerratusDlComputeEnvironment
:
Type
:
AWS::Batch::ComputeEnvironment
Properties
:
Type
:
MANAGED
ComputeResources
:
Type
:
SPOT
MinvCpus
:
0
DesiredvCpus
:
0
MaxvCpus
:
1000
#AllocationStrategy: SPOT_CAPACITY_OPTIMIZED # maybe let's not activate cause i really want a c5d and nothing else
InstanceTypes
:
-
c5d
#- optimal
BidPercentage
:
100
SpotIamFleetRole
:
!Ref
SpotIamFleetRole
Subnets
:
-
Ref
:
Subnet
SecurityGroupIds
:
-
Ref
:
SecurityGroup
InstanceRole
:
Ref
:
IamInstanceProfile
LaunchTemplate
:
LaunchTemplateId
:
!Ref
SpecialComputeLaunchTemplate
Version
:
!GetAtt
SpecialComputeLaunchTemplate.LatestVersionNumber
ServiceRole
:
Ref
:
BatchServiceRole
SpecialComputeLaunchTemplate
:
# https://github.com/vfrank66/awsbatchlaunchtemplate/blob/master/aws-batch-launch-ami.yaml
Type
:
AWS::EC2::LaunchTemplate
Properties
:
LaunchTemplateName
:
"
Special-inc-nvme-assembly"
LaunchTemplateData
:
BlockDeviceMappings
:
-
DeviceName
:
/dev/xvdcz
VirtualName
:
ephemeral0
UserData
:
"
Fn::Base64"
:
!Sub
|
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="
--==MYBOUNDARY==
Content-Type: text/x-shellscript; charset="us-ascii"
#!/bin/bash
# from https://forums.aws.amazon.com/message.jspa?messageID=867011
yum install -y rsync
# mount the ephemeral storage
mkfs.ext4 /dev/nvme1n1
mount -t ext4 /dev/nvme1n1 /mnt/
# make temp directory for containers usage
# should be used in the Batch job definition (MountPoints)
mkdir /mnt/tmp_ext
rsync -avPHSX /tmp/ /mnt/tmp_ext/
# modify fstab to mount /tmp on the new storage.
sed -i '$ a /mnt/tmp_ext /tmp none bind 0 0' /etc/fstab
mount -a
# make /tmp usable by everyone
chmod 777 /mnt/tmp_ext
# Rayan: this makes the local drive go out of space, so I removed it and let's see
#
#service docker stop
## copy the docker directory to the ephemeral storage
#rsync -avPHSX /var/lib/docker/ /mnt/docker_ext/
## set the data directory to the ephemeral storage in the config file of the docker deamon
#DOCKER_CFG_FILE=/etc/docker/daemon.json
#if [ ! -e "${!DOCKER_CFG_FILE}" ]; then
# # need to create a non empty file for sed to work
# echo "{" > ${!DOCKER_CFG_FILE}
#else
# # replace the last } of the file by a ,
# sed -i s/}$/,/ ${!DOCKER_CFG_FILE}
#fi
#sed -i '$ a "data-root": "/mnt/docker_ext/"' ${!DOCKER_CFG_FILE}
#sed -i '$ a }' ${!DOCKER_CFG_FILE}
#service docker start
--==MYBOUNDARY==--
Outputs
:
ComputeEnvironmentArn
:
Value
:
Ref
:
RayanSerratusDlComputeEnvironment
BatchProcessingJobQueueArn
:
Value
:
Ref
:
RayanSerratusDlBatchProcessingJobQueue
BatchProcessingJobDefinitionArn
:
Value
:
Ref
:
RayanSerratusDlBatchProcessingJobDefinition
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment