From 150ca0354054610beafb5de6cb609229b5159ace Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bertrand=20N=C3=A9ron?= <bneron@pasteur.fr>
Date: Mon, 1 Sep 2014 01:11:27 +0200
Subject: [PATCH] add exercises with solutions

---
 source/Control_Flow_Statements.rst       | 14 +++-
 source/Creating_and_Calling_Funtions.rst | 49 +++++++++++++
 source/Input_output.rst                  | 89 ++++++++++++++++++++++++
 source/_static/code/fasta_iterator.py    | 33 +++++++++
 source/_static/code/fasta_reader.py      | 21 ++++++
 source/_static/code/parse_blast.py       | 23 ++++++
 source/index.rst                         |  5 +-
 7 files changed, 232 insertions(+), 2 deletions(-)
 create mode 100644 source/_static/code/fasta_iterator.py
 create mode 100644 source/_static/code/fasta_reader.py
 create mode 100644 source/_static/code/parse_blast.py

diff --git a/source/Control_Flow_Statements.rst b/source/Control_Flow_Statements.rst
index aad5bbc..eaa6f36 100644
--- a/source/Control_Flow_Statements.rst
+++ b/source/Control_Flow_Statements.rst
@@ -38,9 +38,21 @@ The fibonacci suite can be defined as following:
       a = b
       b = new_number 
 
-We will see another way more elegant to implement the fibonacci suite in next chapter.
+We will see another way more elegant to implement the fibonacci suite in :ref:`Advance Programming Techniques` section.
 
+Exercise
+--------
+
+display the largest element in list (containing float or integer only)?::
 
+   l = [1,2,3,4,58,9]
+   for i in l:
+      highest = l[0]
+      if i > highest:
+         highest = i
+    print i
+      
+      
 Exercise
 --------
 
diff --git a/source/Creating_and_Calling_Funtions.rst b/source/Creating_and_Calling_Funtions.rst
index 49f841c..ec5bafd 100644
--- a/source/Creating_and_Calling_Funtions.rst
+++ b/source/Creating_and_Calling_Funtions.rst
@@ -3,3 +3,52 @@
 ******************************
 Creating and Calling Functions
 ******************************
+
+Exercises
+=========
+
+
+Exercice
+--------
+
+Use the code of the exetrcise 4.5.7 on the kmer. Make a function which compute all kmer of a given lenght
+in a sequence.
+
+Exercise
+--------
+
+Write a function translate taht have a nucleic sequence as parameter, and return the translate sequence.
+We give you a genetic code : ::
+  
+   code = {  'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C',
+              'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C',
+              'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*',
+              'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',
+              'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R',
+              'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R',
+              'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R',
+              'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',
+              'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S',
+              'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S',
+              'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R',
+              'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',
+              'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G',
+              'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G',
+              'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G',
+              'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G'
+         }
+
+bonus
+"""""
+
+This function have to take the phase as parameter
+
+bonus
+"""""
+
+This function can take genetique code as default parameter
+
+         
+
+
+
diff --git a/source/Input_output.rst b/source/Input_output.rst
index aa4364e..df61015 100644
--- a/source/Input_output.rst
+++ b/source/Input_output.rst
@@ -3,3 +3,92 @@
 ************************************
 Variables, Expression and statements
 ************************************
+
+Exercises
+=========
+
+Exercise
+--------
+
+Write a function which take the path of file as parameter
+and display it's content on the screen.
+
+We wait same behavior as the shell *cat* command. ::
+
+   import sys
+   import os
+   
+   def cat(path):
+      if not os.path.exists(path):
+         sys.exit("no such file: {0}".format(path)
+      with open(path, 'r') as infile:
+         for line in infile:
+            print line
+         
+Exercise
+--------
+
+Write a function which take the path of a file in rebase format
+and return in a dictionnary the collection of the enzyme contains in the file.
+The sequence of the binding site must be cleaned up.
+
+:download:`rebase_light.txt <_static/data/rebase_light.txt>` .
+ 
+Exercise
+--------
+
+write a function which take the path of a fasta file
+and return a data structure of your choice that allow to stock 
+the id of the sequence and the sequence itself.
+
+:download:`seq.fasta <_static/data/seq.fasta>` .
+
+solution 1
+""""""""""
+.. literalinclude:: _static/code/fasta_reader.py
+   :linenos:
+   :language: python
+
+:download:`fasta_reader.py <_static/code/fasta_reader.py>` .   
+
+solution 2
+""""""""""
+
+.. literalinclude:: _static/code/fasta_iterator.py
+   :linenos:
+   :language: python
+
+:download:`fasta_iterator.py <_static/code/fasta_iterator.py>` .   
+   
+   
+The second version  is an iterator. Thus it retrun sequence by sequence the advantage of this version. 
+If the file contains lot of sequences you have not to load all the file in memory.
+You can call this function and put in in a loop or call next. work with the sequence and pass to the next sequence on so on.
+for instance : ::
+   
+   for seq in fasta_iter('my_fast_file.fasta'):
+      print seq
+    
+Exercise
+--------
+
+we ran a blast with the folowing command *blastall -p blastp -d uniprot_sprot -i query_seq.fasta -e 1e-05 -m 8 -o blast2.txt*
+
+-m 8 is the tabular output. So each fields is separate to the following by a '\t' 
+
+The fields are: query id, database sequence (subject) id, percent identity, alignment length, number of mismatches, number of gap openings, 
+query start, query end, subject start, subject end, Expect value, HSP bit score. 
+
+:download:`blast2.txt <_static/data/blast2.txt>` .
+
+| parse the file
+| sort the hits by their *percent identity* in the descending order.
+| write the results in a new file.
+
+(adapted from *managing your biological data with python* p138) ::
+
+.. literalinclude:: _static/code/parse_blast_output.py
+   :linenos:
+   :language: python
+
+:download:`parse_blast_output.py <_static/code/parse_blast_output.txt>` .   
\ No newline at end of file
diff --git a/source/_static/code/fasta_iterator.py b/source/_static/code/fasta_iterator.py
new file mode 100644
index 0000000..65b2a40
--- /dev/null
+++ b/source/_static/code/fasta_iterator.py
@@ -0,0 +1,33 @@
+from collections import namedtuple 
+from itertools import groupby
+   
+Sequence =  namedtuple("Sequence", "id comment sequence")
+
+def fasta_iter(fasta_path):
+   """
+   :param fasta_file: the file containing all input sequences in fasta format.
+   :type fasta_file: file object
+   :author: http://biostar.stackexchange.com/users/36/brentp
+   :return: for a given fasta file, it returns an iterator which yields tuples
+          (string id, string comment, int sequence length)
+   :rtype: iterator
+   """
+   with open(fasta_path) as fasta_file:
+      # ditch the boolean (x[0]) and just keep the header or sequence since
+      # we know they alternate.
+      group = (x[1] for x in groupby(fasta_file , lambda line: line[0] == ">"))
+      for header in group:
+         # drop the ">"
+         header = header.next()[1:].strip()
+         header = header.split()
+         _id = header[0]
+         comment = ' '.join(header[1:])
+         seq = ''.join(s.strip() for s in group.next())
+         yield Sequence(_id, comment, seq)
+         
+#using exanple:
+#f = fasta_iter('seq.fasta')
+#f.next()
+#or
+# for seq in fasta_iter('seq.fasta'):
+#   do something with seq
\ No newline at end of file
diff --git a/source/_static/code/fasta_reader.py b/source/_static/code/fasta_reader.py
new file mode 100644
index 0000000..a6595c3
--- /dev/null
+++ b/source/_static/code/fasta_reader.py
@@ -0,0 +1,21 @@
+from collections import namedtuple 
+
+Sequence =  namedtuple("Sequence", "id comment sequence")
+
+def fasta_reader(fasta_path):
+    with open(fasta_path, 'r') as fasta_infile:
+        id = ''
+        comment = ''
+        sequence = ''
+        in_sequence = False
+        for line in fasta_infile:
+            if line.startswith('>'):
+                header = line.split()
+                id = header[0]
+                comment = ' '.join(header[1:])
+                in_sequence = True
+            elif in_sequence:
+                sequence += line.strip()
+            else:
+                continue
+        return Sequence(id , comment, sequence)
\ No newline at end of file
diff --git a/source/_static/code/parse_blast.py b/source/_static/code/parse_blast.py
new file mode 100644
index 0000000..19d0c7b
--- /dev/null
+++ b/source/_static/code/parse_blast.py
@@ -0,0 +1,23 @@
+from operator import itemgetter
+
+def parse_blast_output(input_file, output_file):
+   with open(input_file, 'r') as infile:
+      table = []
+      for line in infile:
+         print i 
+         col = line.split('\t')
+         try:
+            col[2] = float(col[2])
+         except ValueError as err:
+            raise RuntimeError("error in parsing {} : {}".format(input_file, err))
+         col[-1] = col[-1][:-1]
+         table.append(col)
+   #from this point the input_file is closed
+   table_sorted = sorted(table, key = itemgetter(2), reverse = True)
+   # alternative
+   # table_sorted = sorted(table, key = lambda x : x[2], reversed = True)
+   with open(output_file, 'w') as output:
+      for row in table_sorted:
+         row = [str(x) for x in row]
+         output.write("\t".join(row) + "\n")
+         
diff --git a/source/index.rst b/source/index.rst
index 1fc790f..d585d83 100644
--- a/source/index.rst
+++ b/source/index.rst
@@ -10,10 +10,13 @@ Contents:
 
 .. toctree::
    :maxdepth: 2
-
+   :numbered:
+   
+   Introduction
    Variables
    Data_Types
    Collection_Data_Types
+   Logical_Operations
    Control_Flow_Statements
    Creating_and_Calling_Functions
    Modules_and_Packages
-- 
GitLab