Skip to content
Snippets Groups Projects
Select Git revision
  • 1f44238fb02a81aa62c76a6fd5467da0b1d42dad
  • master default protected
  • jkende-master-patch-56415
3 results

multiple_fasta_reader.py

Blame
  • multiple_fasta_reader.py 1.05 KiB
    from collections import namedtuple 
    
    Sequence =  namedtuple("Sequence", "id comment sequence")
    
    def fasta_reader(fasta_path):
        """
        :param fasta_path: the path to the file to parse
        :type fasta_path: string
        :return: the list of sequences read from the file
        :rtype: list of Sequence 
        """
        sequences = []
        with open(fasta_path, 'r') as fasta_infile:
            id_ = ''
            comment = ''
            sequence = ''
            for line in fasta_infile:
                if line.startswith('>'):
                    # a new sequence begin
                    if id_ != '':
                        # a sequence was already parsed so add it to the list
                        sequences.append(Sequence(id_ , comment, sequence))
                        sequence = ''
                    header = line.split()
                    id_ = header[0]
                    comment = ' '.join(header[1:])
                else:
                    sequence += line.strip()
            #append the last sequence of the file to the list    
            sequences.append(Sequence(id_ , comment, sequence))
        return sequences