multiple_fasta_reader.py 1.18 KB
Newer Older
1
2
from collections import namedtuple 

3
Sequence = namedtuple("Sequence", "id comment sequence")
4

5
def fasta_reader(fasta_path):
6
7
8
9
10
11
12
    """
    :param fasta_path: the path to the file to parse
    :type fasta_path: string
    :return: the list of sequences read from the file
    :rtype: list of Sequence 
    """
    sequences = []
13
14
15
16
17
18
19
20
21
    with open(fasta_path, 'r') as fasta_infile:
        id_ = ''
        comment = ''
        sequence = ''
        for line in fasta_infile:
            if line.startswith('>'):
                # a new sequence begin
                if id_ != '':
                    # a sequence was already parsed so add it to the list
22
                    sequences.append(Sequence(id_, comment, sequence))
23
24
25
26
27
28
                    sequence = ''
                header = line.split()
                id_ = header[0]
                comment = ' '.join(header[1:])
            else:
                sequence += line.strip()
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
29
        sequences.append(Sequence(id_, comment, sequence))
30
    return sequences
31

32
33
34
35

# The problem with this implementation is that we have to load all 
# sequences in memory before to start to work with
# it is better to return sequence one by one
36
# and treat them as they are loaded.