Commit 89c7bfc5 authored by Rayan  CHIKHI's avatar Rayan CHIKHI
Browse files

make it delete original fastq file from bucket by default

parent 255bdf18
......@@ -16,7 +16,7 @@ LOGTYPE_ERROR = 'ERROR'
LOGTYPE_INFO = 'INFO'
LOGTYPE_DEBUG = 'DEBUG'
def process_file(inputBucket, fileName, region):
def process_file(inputBucket, fileName, region, delete_original=False):
#try:
if True:
urllib3.disable_warnings()
......@@ -44,6 +44,11 @@ def process_file(inputBucket, fileName, region):
# upload unitigs to s3
s3.upload_file(compressed_unitigs_filename, inputBucket, compressed_unitigs_filename)
# delete original file, maybe
if delete_original:
logMessage(fileName, "Deleting original file", LOGTYPE_INFO)
s3.delete_object(Bucket = inputBucket, Key = fileName)
endTime = datetime.now()
diffTime = endTime - startTime
logMessage(fileName, "File processing time - " + str(diffTime.seconds), LOGTYPE_INFO)
......@@ -55,6 +60,7 @@ def main():
inputBucket = ""
fileName = ""
region = "us-east-1"
delete_original = False
#try:
if "InputBucket" in os.environ:
......@@ -70,18 +76,20 @@ def main():
parser.add_argument("--bucketName", "-b", type=str, required=True)
parser.add_argument("--fileName", "-f", type=str, required=True)
parser.add_argument("--region", "-r", type=str, required=True)
parser.add_argument("--delete-original", "-d", dest='delete_original', action='store_true')
args = parser.parse_args()
inputBucket = args.bucketName
fileName = args.fileName
region = args.region
delete_original = args.delete_original
except Exception as ex:
logMessage(fileName, "Unexpected error during arg parsing (due to lack of environment variables):" + str(ex), LOGTYPE_ERROR)
logMessage(fileName, 'parameters: ' + inputBucket + " " + fileName + " " + region, LOGTYPE_INFO)
process_file(inputBucket, fileName, region)
process_file(inputBucket, fileName, region, delete_original)
def logMessage(fileName, message, logType):
......
......@@ -113,7 +113,7 @@ Resources:
- Ref: AWS::Region
- ".amazonaws.com/aws-batch-s3-unitigs-job:latest"
Vcpus: 2
Memory: 2000
Memory: 4000
Command:
- python
- batch_processor.py
......@@ -162,7 +162,7 @@ Resources:
S3Key:
Rules:
- Name: suffix
Value: '.gz'
Value: '.fastq'
BatchProcessBucketPermission:
......@@ -207,7 +207,7 @@ Resources:
batch = boto3.client('batch')
region = batch.meta.region_name
batchCommand = "--bucketName " + bucketName + " --fileName " + inputFileName + " --region " + region
batchCommand = "--bucketName " + bucketName + " --fileName " + inputFileName + " --region " + region + " --delete-original"
out = "inputFileName - " + bucketName + "/" + inputFileName + " Region " + region
out = out + " " + batchCommand
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment