From 72cd4383660aa41fdbb2ea85488df5d8080d4906 Mon Sep 17 00:00:00 2001
From: Amandine PERRIN <amandine.perrin@pasteur.fr>
Date: Mon, 3 Jul 2023 11:48:01 +0200
Subject: [PATCH] Change protein name extraction method

---
 PanACoTA/align_module/get_seqs.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/PanACoTA/align_module/get_seqs.py b/PanACoTA/align_module/get_seqs.py
index d6d347da..a8b61df0 100755
--- a/PanACoTA/align_module/get_seqs.py
+++ b/PanACoTA/align_module/get_seqs.py
@@ -265,10 +265,11 @@ def extract_sequences(to_extract, fasf, files_todo=None, outf=None):
             previous_fp = None
 
             # Extract sequence name
-            last_char = line.find(' ')
-            if last_char == -1:
-                last_char = len(line)
-            seq = line[1:last_char].strip()
+            # last_char = line.find(' ')
+            # if last_char == -1:
+            #     last_char = len(line)
+            # seq = line[1:last_char].strip()
+            seq = line.strip().split()[0][1:]
 
             # Seq is part of sequences to extract
             if seq in to_extract:
-- 
GitLab