Skip to content
Snippets Groups Projects

fix bug display proteins when mutiple nt contigs as input

Merged Remi PLANEL requested to merge dev into main
1 file
+ 28
6
Compare changes
  • Side-by-side
  • Inline
+ 28
6
@@ -261,11 +261,10 @@ class Analysis(Invocation):
@@ -261,11 +261,10 @@ class Analysis(Invocation):
queryset = Protein.objects.filter(analysis=self)
queryset = Protein.objects.filter(analysis=self)
if self.status == DONE and queryset.count() == 0:
if self.status == DONE and queryset.count() == 0:
file_path = self.load_dataset("proteins", "proteins")
file_path = self.load_dataset("proteins", "proteins")
proteins = self.read_fasta_file(file_path)
proteins = self.read_fasta_file(file_path, isFromNt=True)
if len(proteins) <= 1:
if len(proteins) <= 1:
file_path = self.load_dataset("sequences", "proteins")
file_path = self.load_dataset("sequences", "proteins")
proteins = self.read_fasta_file(file_path)
proteins = self.read_fasta_file(file_path)
# print(proteins)
json_prots = [ProteinEntry(**prot).dict() for prot in proteins]
json_prots = [ProteinEntry(**prot).dict() for prot in proteins]
prots = Protein(analysis=self, proteins=json_prots)
prots = Protein(analysis=self, proteins=json_prots)
@@ -283,24 +282,47 @@ class Analysis(Invocation):
@@ -283,24 +282,47 @@ class Analysis(Invocation):
self.stderr = job.wrapped["tool_stderr"]
self.stderr = job.wrapped["tool_stderr"]
self.save()
self.save()
def read_fasta_file(self, file_path):
def read_fasta_file(self, file_path, isFromNt=False):
 
 
# if is from Nt, need to sum prot length.
 
# In order to get proteins that belongs to same contig
 
# just remove (_\d+) to the id
 
sequences = []
sequences = []
if file_path is not None:
if file_path is not None:
with open(file_path) as handle:
with open(file_path) as handle:
current_contig = None
 
offset = 0
 
last_prot_end = 0
for record in SeqIO.parse(handle, "fasta"):
for record in SeqIO.parse(handle, "fasta"):
 
prot = {"id": record.id, "length": len(record), "strand": None}
prot = {"id": record.id, "length": len(record), "strand": None}
 
 
if isFromNt:
 
contig = "-".join(prot["id"].split("_")[0:-1])
 
if current_contig is None or contig != current_contig:
 
current_contig = contig
 
if current_contig is not None:
 
offset = offset + last_prot_end
 
print(offset)
 
description_list = record.description.split(" # ")
description_list = record.description.split(" # ")
if len(description_list) == 5:
if len(description_list) == 5:
start = description_list[1]
start = description_list[1]
end = description_list[2]
end = description_list[2]
strand = description_list[3]
strand = description_list[3]
 
if strand == "1" or strand == "-1":
if strand == "1" or strand == "-1":
prot["strand"] = int(strand)
prot["strand"] = int(strand)
prot["start"] = int(start)
prot["end"] = int(end)
else:
else:
strand = None
strand = None
 
if isFromNt:
 
prot["start"] = offset + int(start)
 
prot["end"] = offset + int(end)
 
last_prot_end = prot["end"]
 
else:
 
prot["start"] = int(start)
 
prot["end"] = int(end)
sequences.append(prot)
sequences.append(prot)
return sequences
return sequences
Loading