Skip to content
Snippets Groups Projects
Commit 6c6f9846 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

Merge branch 'dev' into 'main'

fix bug display proteins when mutiple nt contigs as input

See merge request !16
parents 466e8d30 bf724ffa
No related branches found
No related tags found
1 merge request!16fix bug display proteins when mutiple nt contigs as input
Pipeline #125612 passed
...@@ -261,11 +261,10 @@ class Analysis(Invocation): ...@@ -261,11 +261,10 @@ class Analysis(Invocation):
queryset = Protein.objects.filter(analysis=self) queryset = Protein.objects.filter(analysis=self)
if self.status == DONE and queryset.count() == 0: if self.status == DONE and queryset.count() == 0:
file_path = self.load_dataset("proteins", "proteins") file_path = self.load_dataset("proteins", "proteins")
proteins = self.read_fasta_file(file_path) proteins = self.read_fasta_file(file_path, isFromNt=True)
if len(proteins) <= 1: if len(proteins) <= 1:
file_path = self.load_dataset("sequences", "proteins") file_path = self.load_dataset("sequences", "proteins")
proteins = self.read_fasta_file(file_path) proteins = self.read_fasta_file(file_path)
# print(proteins)
json_prots = [ProteinEntry(**prot).dict() for prot in proteins] json_prots = [ProteinEntry(**prot).dict() for prot in proteins]
prots = Protein(analysis=self, proteins=json_prots) prots = Protein(analysis=self, proteins=json_prots)
...@@ -283,24 +282,47 @@ class Analysis(Invocation): ...@@ -283,24 +282,47 @@ class Analysis(Invocation):
self.stderr = job.wrapped["tool_stderr"] self.stderr = job.wrapped["tool_stderr"]
self.save() self.save()
def read_fasta_file(self, file_path): def read_fasta_file(self, file_path, isFromNt=False):
# if is from Nt, need to sum prot length.
# In order to get proteins that belongs to same contig
# just remove (_\d+) to the id
sequences = [] sequences = []
if file_path is not None: if file_path is not None:
with open(file_path) as handle: with open(file_path) as handle:
current_contig = None
offset = 0
last_prot_end = 0
for record in SeqIO.parse(handle, "fasta"): for record in SeqIO.parse(handle, "fasta"):
prot = {"id": record.id, "length": len(record), "strand": None} prot = {"id": record.id, "length": len(record), "strand": None}
if isFromNt:
contig = "-".join(prot["id"].split("_")[0:-1])
if current_contig is None or contig != current_contig:
current_contig = contig
if current_contig is not None:
offset = offset + last_prot_end
print(offset)
description_list = record.description.split(" # ") description_list = record.description.split(" # ")
if len(description_list) == 5: if len(description_list) == 5:
start = description_list[1] start = description_list[1]
end = description_list[2] end = description_list[2]
strand = description_list[3] strand = description_list[3]
if strand == "1" or strand == "-1": if strand == "1" or strand == "-1":
prot["strand"] = int(strand) prot["strand"] = int(strand)
prot["start"] = int(start)
prot["end"] = int(end)
else: else:
strand = None strand = None
if isFromNt:
prot["start"] = offset + int(start)
prot["end"] = offset + int(end)
last_prot_end = prot["end"]
else:
prot["start"] = int(start)
prot["end"] = int(end)
sequences.append(prot) sequences.append(prot)
return sequences return sequences
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment