diff --git a/phagetermvirome/functions_PhageTerm.py b/phagetermvirome/functions_PhageTerm.py index 0c1a33062ad9bab20601d3aa5ecd66302ebe7f13..c16fdd11d9cefd8b53709473410821a010ecfc7a 100644 --- a/phagetermvirome/functions_PhageTerm.py +++ b/phagetermvirome/functions_PhageTerm.py @@ -78,7 +78,7 @@ def chunks(l, n): def init_ws(p_res,refseq,hostseq): gen_len = len(refseq) host_len = len(hostseq) - k = count_line = 0 + cnt_reads_processed = count_line = 0 if p_res==None: termini_coverage = np.array([gen_len*[0], gen_len*[0]]) whole_coverage = np.array([gen_len*[0], gen_len*[0]]) @@ -170,7 +170,7 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta ### Match sense + (multiple, random pick one) #print("read[:fParms.seed]=",read[:fParms.seed]) - matchPplus_start, matchPplus_end = applyCoverage(read[:fParms.seed], refseq,1) + matchPplus_start, matchPplus_end = applyCoverage(read[:fParms.seed], refseq,fParms.allow_1mismatch) ## Phage if matchPplus_start != -1: @@ -187,7 +187,7 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta # Paired-read if inRawDArgs.paired != "": - matchPplus_start_paired, matchPplus_end_paired = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], refseq,1) + matchPplus_start_paired, matchPplus_end_paired = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], refseq, fParms.allow_1mismatch) insert_length = matchPplus_end_paired - matchPplus_start if insert_length > 0 and insert_length < fParms.insert_max: position_end = matchPplus_end_paired @@ -196,13 +196,13 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta paired_missmatch += 1 # Paired hybrid if inDArgs.hostseq != "" and matchPplus_start_paired == -1: - matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq,1) + matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq,fParms.allow_1mismatch) if matchHplus_start != -1: list_hybrid[0] += 1 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) else: - matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[:fParms.seed], inDArgs.hostseq,1) + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[:fParms.seed], inDArgs.hostseq,fParms.allow_1mismatch) if matchHminus_start != -1: list_hybrid[0] += 1 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) @@ -210,15 +210,15 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta # Single hybrid elif inDArgs.hostseq != "": - matchPFplus_start, matchPFplus_end = applyCoverage(read[-fParms.seed:], refseq,1) + matchPFplus_start, matchPFplus_end = applyCoverage(read[-fParms.seed:], refseq, fParms.allow_1mismatch) if matchPFplus_start == -1: - matchHplus_start, matchHplus_end = applyCoverage(read[-fParms.seed:], inDArgs.hostseq,1) + matchHplus_start, matchHplus_end = applyCoverage(read[-fParms.seed:], inDArgs.hosts,fParms.allow_1mismatch) if matchHplus_start != -1: list_hybrid[0] += 1 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) else: - matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq,1) + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq,fParms.allow_1mismatch) if matchHminus_start != -1: list_hybrid[0] += 1 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) @@ -230,7 +230,7 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta ### if no match sense +, then test sense - if not match: - matchPminus_start, matchPminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], refseq,1) + matchPminus_start, matchPminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], refseq,fParms.allow_1mismatch) ## Phage if matchPminus_end != -1: @@ -247,7 +247,7 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta # Paired-read if inRawDArgs.paired != "": - matchPminus_start_paired, matchPminus_end_paired = applyCoverage(read_paired[:fParms.seed], refseq,1) + matchPminus_start_paired, matchPminus_end_paired = applyCoverage(read_paired[:fParms.seed], refseq,fParms.allow_1mismatch) insert_length = matchPminus_end - matchPminus_start_paired if insert_length > 0 and insert_length < fParms.insert_max: position_start = matchPminus_start_paired @@ -256,14 +256,14 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta paired_missmatch += 1 # Paired hybrid if inDArgs.hostseq != "" and matchPminus_start_paired == -1: - matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq,1) + matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq,fParms.allow_1mismatch) if matchHplus_start != -1: list_hybrid[1] += 1 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) else: - matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], inDArgs.hostseq,1) + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], inDArgs.hostseq,fParms.allow_1mismatch) if matchHminus_start != -1: list_hybrid[1] += 1 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) @@ -271,15 +271,15 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta # Single hybrid elif inDArgs.hostseq != "": - matchPRplus_start, matchPRplus_end = applyCoverage(reverseComplement(read)[:fParms.seed], refseq,1) + matchPRplus_start, matchPRplus_end = applyCoverage(reverseComplement(read)[:fParms.seed], refseq,fParms.allow_1mismatch) if matchPRplus_start == -1: - matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq,1) + matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq,fParms.allow_1mismatch) if matchHplus_start != -1: list_hybrid[1] += 1 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) else: - matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[:fParms.seed], inDArgs.hostseq,1) + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[:fParms.seed], inDArgs.hostseq,fParms.allow_1mismatch) if matchHminus_start != -1: list_hybrid[1] += 1 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) @@ -291,12 +291,12 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta ### if no match on Phage, test Host if not match: - matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq,1) + matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq,fParms.allow_1mismatch) if matchHplus_start != -1: for i in range(matchHplus_start, min(host_len,matchHplus_end+corlen)): host_whole_coverage[0][i]+=1 else: - matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq,1) + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq,fParms.allow_1mismatch) if matchHminus_end != -1: for i in range(max(0,matchHminus_start-corlen), matchHminus_end): host_whole_coverage[1][i]+=1 @@ -351,7 +351,7 @@ def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_sta host_hybrid_coverage = correctEdge(host_hybrid_coverage, fParms.edge) if return_dict!=None and tParms.dir_cov_mm==None: - return_dict[core_id] = [termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage, host_whole_coverage, list_hybrid, np.array(insert), paired_missmatch, k] + return_dict[core_id] = [termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage, host_whole_coverage, list_hybrid, np.array(insert), paired_missmatch, cnt_reads_processed] elif return_dict==None and tParms.dir_cov_mm!=None: insert = np.array(insert) fic_name = os.path.join(tParms.dir_cov_mm, "coverage" + str(idx_refseq) + "_" + str(core_id)) diff --git a/phagetermvirome/main_utils.py b/phagetermvirome/main_utils.py index 042d4d0adaafe5c461d2829e3a995ef9bf5f4ef3..e17328cb8517746c00d2cb0ce16a34b232cf8a9a 100755 --- a/phagetermvirome/main_utils.py +++ b/phagetermvirome/main_utils.py @@ -250,7 +250,8 @@ class inputRawDataArgs: # # Here gather user input parameters and global variable that define how the data will be processed from a functionnal point of view (ex: seed length...) class functionalParms: - def __init__(self,seed,surround,mean,limit,virome,test): + def __init__(self,seed,surround,mean,limit,virome,test,allow_1mismatch=1): + self.allow_1mismatch=allow_1mismatch # experimental stuff, allowing or not 1 mismatch in the seed matching may become a ptv option later. if seed == None: seed = 20 if seed < 15: diff --git a/unit-tests/Data4Test.py b/unit-tests/Data4Test.py index 20ebf2a53283b50477383838de5effdfdad3e77e..810fa8cd8e9b6c070d99efcaa15a9801aa34b27b 100755 --- a/unit-tests/Data4Test.py +++ b/unit-tests/Data4Test.py @@ -14,7 +14,8 @@ class Data4Test: self.refseq_list,name,rej=genomeFastaRecovery(fasta, 500, edge, host_test = 0) self.fParms=functionalParms(seed, None, None, limit_coverage, virome, None) self.fParms.edge = edge - self.fParmsinsert_max = insert_max + self.fParms.insert_max = insert_max + self.fParms.allow_1mismatch=0 self.inDArgs=InputDerivedDataArgs(self.inDRawArgs,self.fParms) if hostseq!="": # expect name of fasta file containing hostseq hs,name,rej=genomeFastaRecovery(fasta, 500, edge, host_test=0)