From fe28cbd60f5d55a1c431a512ab10f1ab800ee275 Mon Sep 17 00:00:00 2001 From: Amandine PERRIN <amandine.perrin@pasteur.fr> Date: Wed, 20 Sep 2023 16:12:10 +0200 Subject: [PATCH] Small speed improvment while extracting proteins --- PanACoTA/align_module/get_seqs.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/PanACoTA/align_module/get_seqs.py b/PanACoTA/align_module/get_seqs.py index a8b61df0..b0af4000 100755 --- a/PanACoTA/align_module/get_seqs.py +++ b/PanACoTA/align_module/get_seqs.py @@ -265,11 +265,13 @@ def extract_sequences(to_extract, fasf, files_todo=None, outf=None): previous_fp = None # Extract sequence name - # last_char = line.find(' ') - # if last_char == -1: - # last_char = len(line) - # seq = line[1:last_char].strip() - seq = line.strip().split()[0][1:] + last_char = line.find('\t') + if last_char == -1: + last_char = line.find(' ') + if last_char == -1: + last_char = len(line) + seq = line[1:last_char].strip() + # seq = line.strip().split()[0][1:] # Seq is part of sequences to extract if seq in to_extract: -- GitLab