Select Git revision
seq_io.py 965 B
#!/usr/bin/env python3
"""Example of use of SeqIO from biopython
Here, we parse a fasta file, put the records in a list, search for a motif in
the sequence of the first record, and create a subsequence around this motif.
"""
from Bio import SeqIO
records = list(SeqIO.parse(fasta_filename, "fasta"))
records
# [SeqRecord(seq=Seq('GCCTCGGCCTCTGCATAAATAAAAAAAATTAGTCAGCCATGGGGCGGAGAATGG...GCG', SingleLetterAlphabet()), id='gi|965480|gb|J02400.1|SV4CG', name='gi|965480|gb|J02400.1|SV4CG', description='gi|965480|gb|J02400.1|SV4CG Simian virus 40 complete genome', dbxrefs=[])]
sequence = records[0].seq
sequence
# Seq('GCCTCGGCCTCTGCATAAATAAAAAAAATTAGTCAGCCATGGGGCGGAGAATGG...GCG', SingleLetterAlphabet())
motif = "TAAAT"
sequence.find(motif)
# 15
motif_pos = sequence.find(motif)
subseq_start = motif_pos - 10
subseq_end = motif_pos + len(motif) + 11
subseq = sequence[subseq_start:subseq_end]
subseq
# Seq('GGCCTCTGCATAAATAAAAAAAATTA', SingleLetterAlphabet())