diff --git a/source/_static/code/seq_io.py b/source/_static/code/seq_io.py new file mode 100644 index 0000000000000000000000000000000000000000..84de0cb2786c07f38bf7f1e8434ee42b8cfd5a0c --- /dev/null +++ b/source/_static/code/seq_io.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +"""Example of use of SeqIO from biopython + +Here, we parse a fasta file, put the records in a list, search for a motif in +the sequence of the first record, and create a subsequence around this motif. +""" + +from Bio import SeqIO + +records = list(SeqIO.parse(fasta_filename, "fasta")) +records +# [SeqRecord(seq=Seq('GCCTCGGCCTCTGCATAAATAAAAAAAATTAGTCAGCCATGGGGCGGAGAATGG...GCG', SingleLetterAlphabet()), id='gi|965480|gb|J02400.1|SV4CG', name='gi|965480|gb|J02400.1|SV4CG', description='gi|965480|gb|J02400.1|SV4CG Simian virus 40 complete genome', dbxrefs=[])] +sequence = records[0].seq +sequence +# Seq('GCCTCGGCCTCTGCATAAATAAAAAAAATTAGTCAGCCATGGGGCGGAGAATGG...GCG', SingleLetterAlphabet()) +motif = "TAAAT" +sequence.find(motif) +# 15 +motif_pos = sequence.find(motif) +subseq_start = motif_pos - 10 +subseq_end = motif_pos + len(motif) + 11 +subseq = sequence[subseq_start:subseq_end] +subseq +# Seq('GGCCTCTGCATAAATAAAAAAAATTA', SingleLetterAlphabet()) +