From a604f7828fee1d7ec9e384129b27d3fce29d4e0e Mon Sep 17 00:00:00 2001
From: Maximilian Press <max@phasegenomics.com>
Date: Fri, 2 Apr 2021 15:51:08 -0700
Subject: [PATCH] Small changes fixing PE coverage

I am trying to identify a minimal set of code changes that correct outputs in the statistics.csv file. I think it might be the following:

1. apply utilities.correctEdge() to paired_whole_coverage array inside of readsCoverage().

2. Make statistics.csv report _always_ write out paired_whole_coverage rather than whole_coverage. They should be equivalent in single-end case, and whole_coverage is simply wrong in PE case. So just use paired_whole_coverage always, don't bother testing for it.

Trivially, (2) might fix the problem by just making PT report the coverage that it is actually using, if the conditional there is causing a problem. But since the statistics.csv file is our source of truth, it looks very wrong. Maybe this will fix it.
---
 _modules/functions_PhageTerm.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/_modules/functions_PhageTerm.py b/_modules/functions_PhageTerm.py
index 63db8b3..f158300 100644
--- a/_modules/functions_PhageTerm.py
+++ b/_modules/functions_PhageTerm.py
@@ -340,7 +340,7 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta
         print("WARNING: No read Match, please check your fastq file")
     
     termini_coverage       = correctEdge(termini_coverage, fParms.edge)
-    #paired_whole_coverage = correctEdge(whole_coverage, fParms.edge) #TODO: discuss with Julian and Max about the PE issue that Max reported.
+    paired_whole_coverage = correctEdge(whole_coverage, fParms.edge) #TODO: discuss with Julian and Max about the PE issue that Max reported.
     whole_coverage         = correctEdge(whole_coverage, fParms.edge)
     phage_hybrid_coverage = correctEdge(phage_hybrid_coverage, fParms.edge)
     if inDArgs.hostseq != "":
@@ -694,19 +694,15 @@ def ExportStatistics(phagename, whole_coverage, paired_whole_coverage, termini_c
     export = pd.DataFrame()
     # ORGANIZE Column
     export["Position"]             = list(phage_plus_norm.sort_values("Position")["Position"])
-    if paired != "":
-        export["Coverage +"]       = paired_whole_coverage[0]
-    else:
-        export["Coverage +"]       = whole_coverage[0]
+    # coverage values for whole_coverage == paired_whole_coverage in the single-end case
+    export["Coverage +"]       = paired_whole_coverage[0]
     export["SPC +"]                = termini_coverage[0]
     export["T +"]                  = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC_std"])]
     export["T + (close)"]          = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC"])]
     export["pvalue +"]             = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma"])]
     export["padj +"]               = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma_adj"])]
-    if paired != "":
-        export["Coverage -"]       = whole_coverage[1]
-    else:
-        export["Coverage -"]       = paired_whole_coverage[1]
+    # coverage values for whole_coverage == paired_whole_coverage in the single-end case
+    export["Coverage -"]       = paired_whole_coverage[1]
     export["SPC -"]                = termini_coverage[1]
     export["T -"]                  = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC_std"])]
     export["T - (close)"]          = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC"])]
-- 
GitLab