Skip to content
Snippets Groups Projects
Commit b15e537d authored by Karen  DRUART's avatar Karen DRUART
Browse files

Select the main accessions removing isoform information from fasta file.

Stripping the keyword for the xml file if has some space in the begining of the line.
parent 603b8178
No related branches found
No related tags found
No related merge requests found
...@@ -47,9 +47,6 @@ ...@@ -47,9 +47,6 @@
import re import re
import sys
import getopt
def define_db(database): def define_db(database):
...@@ -107,6 +104,9 @@ if isFasta: ...@@ -107,6 +104,9 @@ if isFasta:
for line in lines: for line in lines:
if line.startswith(">"): if line.startswith(">"):
acc=line.split("|")[1] acc=line.split("|")[1]
if acc.find("-"):
acc=acc.split("-")[0]
print(acc)
l_acc.append(acc) l_acc.append(acc)
print("\n") print("\n")
print("Number of sequence in fasta file: "+str(len(l_acc))+"\n") print("Number of sequence in fasta file: "+str(len(l_acc))+"\n")
...@@ -116,6 +116,8 @@ else: ...@@ -116,6 +116,8 @@ else:
print("\n") print("\n")
print("Number of accessions in file: "+str(len(l_acc))+"\n") print("Number of accessions in file: "+str(len(l_acc))+"\n")
l_acc = list(dict.fromkeys(l_acc))
xmlo=open(options.output_name+".xml","w") xmlo=open(options.output_name+".xml","w")
...@@ -130,7 +132,7 @@ flag_entry=0 ...@@ -130,7 +132,7 @@ flag_entry=0
temp_entry=[] temp_entry=[]
toprint=1 toprint=1
for line in xml_lines: for line in xml_lines:
if line.startswith("<entry"): if line.strip().startswith("<entry"):
acc=[] acc=[]
seq=[] seq=[]
name="" name=""
...@@ -139,21 +141,21 @@ for line in xml_lines: ...@@ -139,21 +141,21 @@ for line in xml_lines:
temp_entry=[line] temp_entry=[line]
flag_entry=1 flag_entry=1
toprint=1 toprint=1
elif line.startswith("<accession"): elif line.strip().startswith("<accession"):
acc.append(re.split(">|<",line)[2]) acc.append(re.split(">|<",line)[2])
temp_entry.append(line) temp_entry.append(line)
elif line.startswith("<sequence"): elif line.strip().startswith("<sequence"):
temp_entry.append(line) temp_entry.append(line)
if line.find("</sequence>"): if line.find("</sequence>"):
seq=re.split(">|<",line)[2] seq=re.split(">|<",line)[2]
elif line.startswith("<name>"): elif line.strip().startswith("<name>"):
temp_entry.append(line) temp_entry.append(line)
name=re.split(">|<",line)[2] name=re.split(">|<",line)[2]
elif line.startswith("<fullName>"): elif line.strip().startswith("<fullName>"):
temp_entry.append(line) temp_entry.append(line)
if fullname=="": if fullname=="":
fullname=re.split(">|<",line)[2] fullname=re.split(">|<",line)[2]
elif line.startswith('</entry'): elif line.strip().startswith('</entry'):
temp_entry.append(line) temp_entry.append(line)
flag_entry=0 flag_entry=0
# we test if we have to keep this proteoform # we test if we have to keep this proteoform
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment