From a230b47b25e31a0681ae574ba2c7b23bc76cea8a Mon Sep 17 00:00:00 2001
From: Veronique Legrand <vlegrand@pasteur.fr>
Date: Tue, 18 Mar 2025 17:17:39 +0100
Subject: [PATCH] work in progress: better handling of checkpoints

---
 phagetermvirome/functions_PhageTerm.py |  7 ++++++-
 phagetermvirome/readsCoverage_res.py   | 12 ++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/phagetermvirome/functions_PhageTerm.py b/phagetermvirome/functions_PhageTerm.py
index 79a11dd..c9e9b82 100644
--- a/phagetermvirome/functions_PhageTerm.py
+++ b/phagetermvirome/functions_PhageTerm.py
@@ -118,7 +118,12 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta
 
     p_res=chk_handler.load(core_id,idx_refseq)
     gen_len,host_len,termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage,\
-    host_whole_coverage, list_hybrid, insert, paired_missmatch, k, count_line, read_match=init_ws(p_res, refseq, inDArgs.hostseq)
+    host_whole_coverage, list_hybrid, insert, paired_mismatch, k, count_line, read_match=init_ws(p_res, refseq, inDArgs.hostseq)
+    if p_res==None:
+        # no existing checkpoint and starting processing of a new sequence
+        chk_handler.start(count_line,core_id,idx_refseq,termini_coverage,whole_coverage,paired_whole_coverage,\
+                 phage_hybrid_coverage, host_hybrid_coverage, \
+                 host_whole_coverage,list_hybrid,insert,paired_mismatch,count_line,read_match)
     if logger!=None:
         logger.add_rw(p_res)
     test_read_seq = match = 0
diff --git a/phagetermvirome/readsCoverage_res.py b/phagetermvirome/readsCoverage_res.py
index ba94792..74f317a 100644
--- a/phagetermvirome/readsCoverage_res.py
+++ b/phagetermvirome/readsCoverage_res.py
@@ -291,6 +291,18 @@ class RCCheckpoint_handler:
                  host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match)
             chkp.save(self.dir_chk,core_id,idx_seq)
 
+    # When running on a cluster, ptv may be killed due to timeout. It is possible that in that case, that the processing of sequence n-1 is over
+    # (there is no more checkpoint for n-1) and ptv has not yet created a checkpoint for sequence n.
+    # The following method is used for creating a checkpoint at the beginning of processing of sequence n to avoid ptv
+    # having to restart from the beginning if it is killed due to timeout
+    def start(self,count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\
+                 phage_hybrid_coverage, host_hybrid_coverage, \
+                 host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match):
+        if self.chk_freq != 0 and self.test_mode == False:
+            chkp = RCCheckpoint(count_line, core_id, idx_seq, termini_coverage, whole_coverage, paired_whole_coverage, \
+                            phage_hybrid_coverage, host_hybrid_coverage, \
+                            host_whole_coverage, list_hybrid, insert, paired_mismatch, reads_tested, read_match)
+            chkp.save(self.dir_chk, core_id, idx_seq)
 
     def end(self,core_id):
         if (self.test_mode==False and self.chk_freq!=0) :
-- 
GitLab