diff --git a/source/Data_Types.rst b/source/Data_Types.rst index e36ec6bbd1b5ed003a0a16c4726ad95b35be5215..cae429a2bc93aa7f579dd909a4b1dc82d548acfd 100644 --- a/source/Data_Types.rst +++ b/source/Data_Types.rst @@ -330,39 +330,6 @@ Exercise .. #. Using the shorter string ``s = 'gaattc'`` draw what happens in memory when you reverse ``s``. -Exercise --------- - -| The ``il2_human`` sequence contains 4 cysteins (C) in positions 9, 78, 125, 145. -| We want to generate the sequence of a mutant where the cysteins 78 and 125 are replaced by serins (S) -| Write the pseudocode, before proposing an implementation: - - -We have to take care of the difference between Python string numbering and usual position numbering: - -| C in seq -> in string -| 9 -> 8 -| 78 -> 77 -| 125 -> 124 -| 145 -> 144 - -| *generate 3 slices from the il2_human* -| *head <- from the begining and cut between the first cystein and the second* -| *body <- include the 2nd and 3rd cystein* -| *tail <- cut after the 3rd cystein until the end* -| *replace body cystein by serin* -| *make new sequence with head body_mutate tail* - - -:: - - il2_human = 'MYRMQLLSCIALSLALVTNSAPTSSSTKKTQLQLEHLLLDLQMILNGINNYKNPKLTRMLTFKFYMPKKATELKHLQCLEEELKPLEEVLNLAQSKNFHLRPRDLISNINVIVLELKGSETTFMCEYADETATIVEFLNRWITFCQSIISTLT' - head = il2_human[:77] - body = il2_human[77:125] - tail = il2_human[126:] - body_mutate = body.replace('C', 'S') - il2_mutate = head + body_mutate + tail - Exercise -------- @@ -388,7 +355,8 @@ Use the sv40 sequence to test your function. >>> >>> sequence = fasta_to_one_line(sv40) >>> gc_pc = gc_percent(sequence) - >>> report = "The sv40 is {0} bp length and has {1:.2%} gc".format(len(sequence), gc_pc) + >>> # report = "The sv40 is {0} bp length and has {1:.2%} gc".format(len(sequence), gc_pc) + >>> report = f"The sv40 is {len(sequence)} bp length and has {gc_pc:.2%} gc" >>> print report 'The sv40 is 5243 bp length and has 40.80% gc' diff --git a/source/Input_Output.rst b/source/Input_Output.rst index c6c79cc8983e4c8e2ce5976dd602e0a884d1a03c..8f561f7304fa08c7589323ca0b8c87f7d2c14669 100644 --- a/source/Input_Output.rst +++ b/source/Input_Output.rst @@ -89,10 +89,10 @@ Exercise We ran a blast with the following command *blastall -p blastp -d uniprot_sprot -i query_seq.fasta -e 1e-05 -m 8 -o blast2.txt* --m 8 is the tabular output. So each fields is separate to the following by a '\t' +-m 8 is the tabular output. So each fields is separated from the following by a '\\t' The fields are: query id, database sequence (subject) id, percent identity, alignment length, number of mismatches, number of gap openings, -query start, query end, subject start, subject end, Expect value, HSP bit score. +query start, query end, subject start, subject end, Expect value, HSP bit score. :download:`blast2.txt <_static/data/blast2.txt>` . diff --git a/source/_static/code/gc_percent.py b/source/_static/code/gc_percent.py index 0f6575d5e0e206954b582f486b0923b01b8a9298..a9abe85aad840581c25a3f1df7acdc7f99e1789c 100644 --- a/source/_static/code/gc_percent.py +++ b/source/_static/code/gc_percent.py @@ -1,11 +1,13 @@ def gc_percent(seq): """ + Compute the ratio of GC in a DNA sequence + :param seq: The nucleic sequence to compute :type seq: string :return: the percent of GC in the sequence :rtype: float """ seq = seq.upper() - gc_pc = float(seq.count('G') + seq.count('C')) / float(len(seq)) + gc_pc = seq.count('G') + seq.count('C') / len(seq) return gc_pc diff --git a/source/_static/code/parse_blast.py b/source/_static/code/parse_blast.py index 2a191373635af6a0defc4dc7d6e8dabe6c624d20..ca378316ec14a2952c47dd503ec245af6d4c1d22 100644 --- a/source/_static/code/parse_blast.py +++ b/source/_static/code/parse_blast.py @@ -45,5 +45,5 @@ if __name__ == '__main__': table_sorted = sorted(table_hits, key=itemgetter(2), reverse=True) # alternative # table_sorted = sorted(table, key = lambda x : x[2], reversed = True) - write_blast_output(table_hits, 'blast_sorted.txt') + write_blast_output(table_sorted, 'blast_sorted.txt')