From fe28cbd60f5d55a1c431a512ab10f1ab800ee275 Mon Sep 17 00:00:00 2001
From: Amandine PERRIN <amandine.perrin@pasteur.fr>
Date: Wed, 20 Sep 2023 16:12:10 +0200
Subject: [PATCH] Small speed improvment while extracting proteins

---
 PanACoTA/align_module/get_seqs.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/PanACoTA/align_module/get_seqs.py b/PanACoTA/align_module/get_seqs.py
index a8b61df0..b0af4000 100755
--- a/PanACoTA/align_module/get_seqs.py
+++ b/PanACoTA/align_module/get_seqs.py
@@ -265,11 +265,13 @@ def extract_sequences(to_extract, fasf, files_todo=None, outf=None):
             previous_fp = None
 
             # Extract sequence name
-            # last_char = line.find(' ')
-            # if last_char == -1:
-            #     last_char = len(line)
-            # seq = line[1:last_char].strip()
-            seq = line.strip().split()[0][1:]
+            last_char = line.find('\t')
+            if last_char == -1:
+                last_char = line.find(' ')
+                if last_char == -1:            
+                    last_char = len(line)
+            seq = line[1:last_char].strip()
+            # seq = line.strip().split()[0][1:]
 
             # Seq is part of sequences to extract
             if seq in to_extract:
-- 
GitLab