Skip to content
Snippets Groups Projects
Commit bf724ffa authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

fix bug display proteins when mutiple nt contigs as input

parent 466e8d30
No related branches found
No related tags found
1 merge request!16fix bug display proteins when mutiple nt contigs as input
Pipeline #125611 passed
......@@ -261,11 +261,10 @@ class Analysis(Invocation):
queryset = Protein.objects.filter(analysis=self)
if self.status == DONE and queryset.count() == 0:
file_path = self.load_dataset("proteins", "proteins")
proteins = self.read_fasta_file(file_path)
proteins = self.read_fasta_file(file_path, isFromNt=True)
if len(proteins) <= 1:
file_path = self.load_dataset("sequences", "proteins")
proteins = self.read_fasta_file(file_path)
# print(proteins)
json_prots = [ProteinEntry(**prot).dict() for prot in proteins]
prots = Protein(analysis=self, proteins=json_prots)
......@@ -283,24 +282,47 @@ class Analysis(Invocation):
self.stderr = job.wrapped["tool_stderr"]
self.save()
def read_fasta_file(self, file_path):
def read_fasta_file(self, file_path, isFromNt=False):
# if is from Nt, need to sum prot length.
# In order to get proteins that belongs to same contig
# just remove (_\d+) to the id
sequences = []
if file_path is not None:
with open(file_path) as handle:
current_contig = None
offset = 0
last_prot_end = 0
for record in SeqIO.parse(handle, "fasta"):
prot = {"id": record.id, "length": len(record), "strand": None}
if isFromNt:
contig = "-".join(prot["id"].split("_")[0:-1])
if current_contig is None or contig != current_contig:
current_contig = contig
if current_contig is not None:
offset = offset + last_prot_end
print(offset)
description_list = record.description.split(" # ")
if len(description_list) == 5:
start = description_list[1]
end = description_list[2]
strand = description_list[3]
if strand == "1" or strand == "-1":
prot["strand"] = int(strand)
prot["start"] = int(start)
prot["end"] = int(end)
else:
strand = None
if isFromNt:
prot["start"] = offset + int(start)
prot["end"] = offset + int(end)
last_prot_end = prot["end"]
else:
prot["start"] = int(start)
prot["end"] = int(end)
sequences.append(prot)
return sequences
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment