diff --git a/source/Object_Oriented_Programming.rst b/source/Object_Oriented_Programming.rst index a35c32c2ac97621ff9935d8dbd9874b60370b389..c063e63f891046200b379b58887320ca1a023ded 100644 --- a/source/Object_Oriented_Programming.rst +++ b/source/Object_Oriented_Programming.rst @@ -93,6 +93,85 @@ The following python code provides an example of the expected behaviour of objec :download:`point.py <_static/code/point.py>` . +Exercise +-------- + +Use biopython to read a fasta file (:download:`sv40.fasta <_static/data/sv40.fasta>`) +and display the attributes + +* id +* name +* description +* seq + +use the module SeqIO in biopython +A tutorial is available https://biopython.org/wiki/SeqIO :: + + from Bio import SeqIO + + sv40_rcd = SeqIO.read("sv40.fasta", "fasta") + print("id =", sv40_rcd.id) + print("name =", sv40_rcd.name) + print("description =", sv40_rcd.description) + print("sequence =", sv40_rcd.seq) + + +Exercise +-------- + +Translate the sequence in phase 1, 2, -2 :: + + sv40_seq_phase1 = sv40_rcd.seq + sv40_seq_phase2 = sv40_rcd[1:] + sv40_seq_phase_2 = sv40_rcd[1:].reverse_complement(id=True) + +Exercise +-------- + +* Create a sequence with the first 42 nucleotides +* Translate this sequence +* Mutate the nucleotide in position 18 'A' -> 'C' +* and translate the mutated sequence + +see tutorial http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc28 :: + + short_seq = sv40_seq_phase2[0:42] + short_seq.translate() + mutable_seq = short_seq.seq.tomutable() + mutable_seq[19] = 'C' + mutate_seq = mutable_seq.toseq() + mutate_seq.translate() + + +Exercise +-------- + +Open the file abcd.fasta (:download:`abcd.fasta <_static/data/abcd.fasta>`) and convert it in genbank format + +**Hint**: +the seq alphabet attribute must be set to extended_protein +see Bio.Alphabet.IUPAC module :: + + from Bio.Alphabet.IUPAC import extended_protein + with open("abcd.fasta", "r") as fasta, open('abcd.gb', 'w') as genbank: + for record in SeqIO.parse(fasta, "fasta"): + record.seq.alphabet = extended_protein + print(len(record.seq)) + SeqIO.write(record, genbank, 'genbank') + + +Exercice +-------- + +Open the file abcd.fasta (:download:`abcd.fasta <_static/data/abcd.fasta>`) and filter out sequence <= 700 +Write the results in fasta file :: + + with open("abcd.fasta", "r") as input, open("abcd_short.fasta", "w") as output: + for record in SeqIO.parse(input, "fasta"): + if len(record.seq) > 700: + SeqIO.write(record, output, 'fasta') + + Exercise -------- diff --git a/source/_static/code/bio.py b/source/_static/code/bio.py new file mode 100644 index 0000000000000000000000000000000000000000..5f663e99bbd4c3e7d8ccf86da73172fdbb44176f --- /dev/null +++ b/source/_static/code/bio.py @@ -0,0 +1,42 @@ +from Bio import SeqIO + + +sv40_rcd = SeqIO.read("sv40.fasta", "fasta") +print("id =", sv40_rcd.id) +print("name =", sv40_rcd.name) +print("description =", sv40_rcd.description) +print("sequence =", sv40_rcd.seq) + +translate the sequence in phase 1, 2, -2 + +sv40_seq_phase1 = sv40_rcd.seq +sv40_seq_phase2 = sv40_rcd[1:] +sv40_seq_phase_2 = sv40_rcd[1:].reverse_complement(id=True) + +cree une seq avec les 42 premiers nucleotides en phase2 +traduire cette sequence +muter le nucleotide 18 'A' -> 'C' +et traduire la sequence muté + + +short_seq = sv40_seq_phase2[0:42] +short_seq.translate() +mutable_seq = short_seq.seq.tomutable() +mutable_seq[19] = 'C' +mutate_seq = mutable_seq.toseq() +mutate_seq.translate() + + +from Bio.Alphabet.IUPAC import extended_protein +with open("abcd.fasta", "r") as fasta, open('abcd.gb', 'w') as genbank: + for record in SeqIO.parse(fasta, "fasta"): + record.seq.alphabet = extended_protein + SeqIO.write(record, genbank, 'genbank') + + + +with open("abcd.fasta", "r") as input, open("abcd_short.fasta", "w") as output: + for record in SeqIO.parse(input, "fasta"): + if len(record.seq) > 700: + SeqIO.write(record, output, 'fasta') + diff --git a/source/_static/data/sv40.fasta b/source/_static/data/sv40.fasta new file mode 100644 index 0000000000000000000000000000000000000000..37b1fb200927abc95bc9da7e8dc7dc991ff5f1a0 --- /dev/null +++ b/source/_static/data/sv40.fasta @@ -0,0 +1,76 @@ +>gi|965480|gb|J02400.1|SV4CG Simian virus 40 complete genome +GCCTCGGCCTCTGCATAAATAAAAAAAATTAGTCAGCCATGGGGCGGAGAATGGGCGGAACTGGGCGGAG +TTAGGGGCGGGATGGGCGGAGTTAGGGGCGGGACTATGGTTGCTGACTAATTGAGATGCATGCTTTGCAT +ACTTCTGCCTGCTGGGGAGCCTGGGGACTTTCCACACCTGGTTGCTGACTAATTGAGATGCATGCTTTGC +ATACTTCTGCCTGCTGGGGAGCCTGGGGACTTTCCACACCCTAACTGACACACATTCCACAGCTGGTTCT +TTCCGCCTCAGAAGGTACCTAACCAAGTTCCTCTTTCAGAGGTTATTTCAGGCCATGGTGCTGCGCCGGC +TGTCACGCCAGGCCTCCGTTAAGGTTCGTAGGTCATGGACTGAAAGTAAAAAAACAGCTCAACGCCTTTT +TGTGTTTGTTTTAGAGCTTTTGCTGCAATTTTGTGAAGGGGAAGATACTGTTGACGGGAAACGCAAAAAA +CCAGAAAGGTTAACTGAAAAACCAGAAAGTTAACTGGTAAGTTTAGTCTTTTTGTCTTTTATTTCAGGTC +CATGGGTGCTGCTTTAACACTGTTGGGGGACCTAATTGCTACTGTGTCTGAAGCTGCTGCTGCTACTGGA +TTTTCAGTAGCTGAAATTGCTGCTGGAGAGGCCGCTGCTGCAATTGAAGTGCAACTTGCATCTGTTGCTA +CTGTTGAAGGCCTAACAACCTCTGAGGCAATTGCTGCTATAGGCCTCACTCCACAGGCCTATGCTGTGAT +ATCTGGGGCTCCTGCTGCTATAGCTGGATTTGCAGCTTTACTGCAAACTGTGACTGGTGTGAGCGCTGTT +GCTCAAGTGGGGTATAGATTTTTTAGTGACTGGGATCACAAAGTTTCTACTGTTGGTTTATATCAACAAC +CAGGAATGGCTGTAGATTTGTATAGGCCAGATGATTACTATGATATTTTATTTCCTGGAGTACAAACCTT +TGTTCACAGTGTTCAGTATCTTGACCCCAGACATTGGGGTCCAACACTTTTTAATGCCATTTCTCAAGCT +TTTTGGCGTGTAATACAAAATGACATTCCTAGGCTCACCTCACAGGAGCTTGAAAGAAGAACCCAAAGAT +ATTTAAGGGACAGTTTGGCAAGGTTTTTAGAGGAAACTACTTGGACAGTAATTAATGCTCCTGTTAATTG +GTATAACTCTTTACAAGATTACTACTCTACTTTGTCTCCCATTAGGCCTACAATGGTGAGACAAGTAGCC +AACAGGGAAGGGTTGCAAATATCATTTGGGCACACCTATGATAATATTGATGAAGCAGACAGTATTCAGC +AAGTAACTGAGAGGTGGGAAGCTCAAAGCCAAAGTCCTAATGTGCAGTCAGGTGAATTTATTGAAAAATT +TGAGGCTCCTGGTGGTGCAAATCAAAGAACTGCTCCTCAGTGGATGTTGCCTTTACTTCTAGGCCTGTAC +GGAAGTGTTACTTCTGCTCTAAAAGCTTATGAAGATGGCCCCAACAAAAAGAAAAGGAAGTTGTCCAGGG +GCAGCTCCCAAAAAACCAAAGGAACCAGTGCAAGTGCCAAAGCTCGTCATAAAAGGAGGAATAGAAGTTC +TAGGAGTTAAAACTGGAGTAGACAGCTTCACTGAGGTGGAGTGCTTTTTAAATCCTCAAATGGGCAATCC +TGATGAACATCAAAAAGGCTTAAGTAAAAGCTTAGCAGCTGAAAAACAGTTTACAGATGACTCTCCAGAC +AAAGAACAACTGCCTTGCTACAGTGTGGCTAGAATTCCTTTGCCTAATTTAAATGAGGACTTAACCTGTG +GAAATATTTTGATGTGGGAAGCTGTTACTGTTAAAACTGAGGTTATTGGGGTAACTGCTATGTTAAACTT +GCATTCAGGGACACAAAAAACTCATGAAAATGGTGCTGGAAAACCCATTCAAGGGTCAAATTTTCATTTT +TTTGCTGTTGGTGGGGAACCTTTGGAGCTGCAGGGTGTGTTAGCAAACTACAGGACCAAATATCCTGCTC +AAACTGTAACCCCAAAAAATGCTACAGTTGACAGTCAGCAGATGAACACTGACCACAAGGCTGTTTTGGA +TAAGGATAATGCTTATCCAGTGGAGTGCTGGGTTCCTGATCCAAGTAAAAATGAAAACACTAGATATTTT +GGAACCTACACAGGTGGGGAAAATGTGCCTCCTGTTTTGCACATTACTAACACAGCAACCACAGTGCTTC +TTGATGAGCAGGGTGTTGGGCCCTTGTGCAAAGCTGACAGCTTGTATGTTTCTGCTGTTGACATTTGTGG +GCTGTTTACCAACACTTCTGGAACACAGCAGTGGAAGGGACTTCCCAGATATTTTAAAATTACCCTTAGA +AAGCGGTCTGTGAAAAACCCCTACCCAATTTCCTTTTTGTTAAGTGACCTAATTAACAGGAGGACACAGA +GGGTGGATGGGCAGCCTATGATTGGAATGTCCTCTCAAGTAGAGGAGGTTAGGGTTTATGAGGACACAGA +GGAGCTTCCTGGGGATCCAGACATGATAAGATACATTGATGAGTTTGGACAAACCACAACTAGAATGCAG +TGAAAAAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATA +AACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTA +AAGCAAGTAAAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGG +CCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCC +ACCATCTTCATTTTTATCAGCATTTTCCTGGCTGTCTTCATCATCATCATCACTGTTTCTTAGCCAATCT +AAAACTCCAATTCCCATAGCCACATTAAACTTCATTTTTTGATACACTGACAAACTAAACTCTTTGTCCA +ATCTCTCTTTCCACTCCACAATTCTGCTCTGAATACTTTGAGCAAACTCAGCCACAGGTCTGTACCAAAT +TAACATAAGAAGCAAAGCAATGCCACTTTGAATTATTCTCTTTTCTAACAAAAACTCACTGCGTTCCAGG +CAATGCTTTAAATAATCTTTGGGCCTAAAATCTATTTGTTTTACAAATCTGGCCTGCAGTGTTTTAGGCA +CACTGTACTCATTCATGGTGACTATTCCAGGGGGAAATATTTGAGTTCTTTTATTTAGGTGTTTCTTTTC +TAAGTTTACCTTAACACTGCCATCCAAATAATCCCTTAAATTGTCCAGGTTATTAATTCCCTGACCTGAA +GGCAAATCTCTGGACTCCCCTCCAGTGCCCTTTACATCCTCAAAAACTACTAAAAACTGGTCAATAGCTA +CTCCTAGCTCAAAGTTCAGCCTGTCCAAGGGCAAATTAACATTTAAAGCTTTCCCCCCACATAATTCAAG +CAAAGCAGCTGCTAATGTAGTTTTACCACTATCAATTGGTCCTTTAAACAGCCAGTATCTTTTTTTAGGA +ATGTTGTACACCATGCATTTTAAAAAGTCATACACCACTGAATCCATTTTGGGCAACAAACAGTGTAGCC +AAGCAACTCCAGCCATCCATTCTTCTATGTCAGCAGAGCCTGTAGAACCAAACATTATATCCATCCTATC +CAAAAGATCATTAAATCTGTTTGTTAACATTTGTTCTCTAGTTAATTGTAGGCTATCAACCCGCTTTTTA +GCTAAAACAGTATCAACAGCCTGTTGGCATATGGTTTTTTGGTTTTTGCTGTCAGCAAATATAGCAGCAT +TTGCATAATGCTTTTCATGGTACTTATAGTGGCTGGGCTGTTCTTTTTTAATACATTTTAAACACATTTC +AAAACTGTACTGAAATTCCAAGTACATCCCAAGCAATAACAACACATCATCACATTTTGTTTCCATTGCA +TACTCTGTTACAAGCTTCCAGGACACTTGTTTAGTTTCCTCTGCTTCTTCTGGATTAAAATCATGCTCCT +TTAACCCACCTGGCAAACTTTCCTCAATAACAGAAAATGGATCTCTAGTCAAGGCACTATACATCAAATA +TTCCTTATTAACCCCTTTACAAATTAAAAAGCTAAAGGTACACAATTTTTGAGCATAGTTATTAATAGCA +GACACTCTATGCCTGTGTGGAGTAAGAAAAAACAGTATGTTATGATTATAACTGTTATGCCTACTTATAA +AGGTTACAGAATATTTTTCCATAATTTTCTTGTATAGCAGTGCAGCTTTTTCCTTTGTGGTGTAAATAGC +AAAGCAAGCAAGAGTTCTATTACTAAACACAGCATGACTCAAAAAACTTAGCAATTCTGAAGGAAAGTCC +TTGGGGTCTTCTACCTTTCTCTTCTTTTTTGGAGGAGTAGAATGTTGAGAGTCAGCAGTAGCCTCATCAT +CACTAGATGGCATTTCTTCTGAGCAAAACAGGTTTTCCTCATTAAAGGCATTCCACCACTGCTCCCATTC +ATCAGTTCCATAGGTTGGAATCTAAAATACACAAACAATTAGAATCAGTAGTTTAACACATTATACACTT +AAAAATTTTATATTTACCTTAGAGCTTTAAATCTCTGTAGGTAGTTTGTCCAATTATGTCACACCACAGA +AGTAAGGTTCCTTCACAAAGATCAAGTCCAAACCACATTCTAAAGCAATCGAAGCAGTAGCAATCAACCC +ACACAAGTGGATCTTTCCTGTATAATTTTCTATTTTCATGCTTCATCCTCAGTAAGCACAGCAAGCATAT +GCAGTTAGCAGACATTTTCTTTGCACACTCAGGCCATTGTTTGCAGTACATTGCATCAACACCAGGATTT +AAGGAAGAAGCAAATACCTCAGTTGCATCCCAGAAGCCTCCAAAGTCAGGTTGATGAGCATATTTTACTC +CATCTTCCATTTTCTTGTACAGAGTATTCATTTTCTTCATTTTTTCTTCATCTCCTCCTTTATCAGGATG +AAACTCCTTGCATTTTTTTAAATATGCCTTTCTCATCAGAGGAATATTCCCCCAGGCACTCCTTTCAAGA +CCTAGAAGGTCCATTAGCTGCAAAGATTCCTCTCTGTTTAAAACTTTATCCATCTTTGCAAAGCTTTTTG +CAAAAGCCTAGGCCTCCAAAAAAGCCTCCTCACTACTTCTGGAATAGCTCAGAGGCCGAGGCG \ No newline at end of file