From a230b47b25e31a0681ae574ba2c7b23bc76cea8a Mon Sep 17 00:00:00 2001 From: Veronique Legrand <vlegrand@pasteur.fr> Date: Tue, 18 Mar 2025 17:17:39 +0100 Subject: [PATCH] work in progress: better handling of checkpoints --- phagetermvirome/functions_PhageTerm.py | 7 ++++++- phagetermvirome/readsCoverage_res.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/phagetermvirome/functions_PhageTerm.py b/phagetermvirome/functions_PhageTerm.py index 79a11dd..c9e9b82 100644 --- a/phagetermvirome/functions_PhageTerm.py +++ b/phagetermvirome/functions_PhageTerm.py @@ -118,7 +118,12 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta p_res=chk_handler.load(core_id,idx_refseq) gen_len,host_len,termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage,\ - host_whole_coverage, list_hybrid, insert, paired_missmatch, k, count_line, read_match=init_ws(p_res, refseq, inDArgs.hostseq) + host_whole_coverage, list_hybrid, insert, paired_mismatch, k, count_line, read_match=init_ws(p_res, refseq, inDArgs.hostseq) + if p_res==None: + # no existing checkpoint and starting processing of a new sequence + chk_handler.start(count_line,core_id,idx_refseq,termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_mismatch,count_line,read_match) if logger!=None: logger.add_rw(p_res) test_read_seq = match = 0 diff --git a/phagetermvirome/readsCoverage_res.py b/phagetermvirome/readsCoverage_res.py index ba94792..74f317a 100644 --- a/phagetermvirome/readsCoverage_res.py +++ b/phagetermvirome/readsCoverage_res.py @@ -291,6 +291,18 @@ class RCCheckpoint_handler: host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match) chkp.save(self.dir_chk,core_id,idx_seq) + # When running on a cluster, ptv may be killed due to timeout. It is possible that in that case, that the processing of sequence n-1 is over + # (there is no more checkpoint for n-1) and ptv has not yet created a checkpoint for sequence n. + # The following method is used for creating a checkpoint at the beginning of processing of sequence n to avoid ptv + # having to restart from the beginning if it is killed due to timeout + def start(self,count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match): + if self.chk_freq != 0 and self.test_mode == False: + chkp = RCCheckpoint(count_line, core_id, idx_seq, termini_coverage, whole_coverage, paired_whole_coverage, \ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage, list_hybrid, insert, paired_mismatch, reads_tested, read_match) + chkp.save(self.dir_chk, core_id, idx_seq) def end(self,core_id): if (self.test_mode==False and self.chk_freq!=0) : -- GitLab