diff --git a/source/Creating_and_Calling_Functions.rst b/source/Creating_and_Calling_Functions.rst index ec5bafd36ea704f2cb26c273e35164ef93734fee..4552e79844d9582d988f88a4d8518f7c843a70ab 100644 --- a/source/Creating_and_Calling_Functions.rst +++ b/source/Creating_and_Calling_Functions.rst @@ -14,10 +14,16 @@ Exercice Use the code of the exetrcise 4.5.7 on the kmer. Make a function which compute all kmer of a given lenght in a sequence. +.. literalinclude:: _static/code/kmer.py + :linenos: + :language: python + +:download:`kmer.py <_static/code/kmer.py>` . + Exercise -------- -Write a function translate taht have a nucleic sequence as parameter, and return the translate sequence. +Write a function translate that have a nucleic sequence as parameter, and return the translate sequence. We give you a genetic code : :: code = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', @@ -43,12 +49,50 @@ bonus This function have to take the phase as parameter -bonus -""""" +.. literalinclude:: _static/code/translate.py + :linenos: + :language: python + +:download:`translate.py <_static/code/translate.py>` . + +Exercise +-------- -This function can take genetique code as default parameter +Implement a matrix and functions to handle it. +choose the data structure of your choice. +The API (**A**\ pplication **P**\ rogramming **I**\ nterface) to implemet is the following: +.. literalinclude:: _static/code/matrix.py + :linenos: + :language: python + +:download:`matrix.py <_static/code/matrix.py>` . + +Exercise +-------- + +Write a program that calculates the similarity of 2 RNA sequences. + +* To compute the simalirity you need to parse a file containing the similarity matrix. +* The similarity of the 2 sequences is the sum of base similarities. + so you have to compare the first base of to sequence and use the matrix to get the similarity + from the similarity table, on so on for all bases then sum these similarities. + +.. note:: + as we don't yet see how to read a file, we provide a list of strings that represents the file + as we can get them if we read that file. + +:: + lines = iter([' A G C U\n' + 'A 1.0 0.5 0.0 0.0\n', + 'G 0.5 1.0 0.0 0.0\n', + 'C 0.0 0.0 1.0 0.5\n', + 'U 0.0 0.0 0.5 1.0\n']) +.. literalinclude:: _static/code/similarity.py + :linenos: + :language: python +:download:`similarity.py <_static/code/similarity.py>` . \ No newline at end of file diff --git a/source/_static/code/kmer.py b/source/_static/code/kmer.py new file mode 100644 index 0000000000000000000000000000000000000000..d1e11b62916e3eba8e5ba1d36fd1cd974274be28 --- /dev/null +++ b/source/_static/code/kmer.py @@ -0,0 +1,8 @@ +import collections + +def kmer(sequence, k): + kmers = collection.defaultdict(int) + for i in range(len(sequence) - k): + kmer = sequence[i:i + k] + kmers[kmer] = kmers.get(kmer, 0) + 1 + return kmers \ No newline at end of file diff --git a/source/_static/code/matrix.py b/source/_static/code/matrix.py new file mode 100755 index 0000000000000000000000000000000000000000..0cd1ab421373bcc7b74f8064a42b030f259690a4 --- /dev/null +++ b/source/_static/code/matrix.py @@ -0,0 +1,237 @@ +""" +Implementation of simple matrix +""" + + +def matrix_maker(row_num, col_num, val = None): + """ + :param row_num: the number of rows + :type row_num: int + :param col_num: the number of columns + :type col_num: int + :param val: the default value to fill the matrix + :type val: any (None by default) + :return: matrix of rows_num x col_num + :rtype: matrix + """ + matrix = [] + for i in range(col_num): + col = [val] * row_num + matrix.append(col) + return matrix + + +def _check_matrix_index(matrix, row_no, col_no): + """ + check if row_no and col_no are in matrix bound + + :param matrix: the matrix to compute the size + :type matrix: matrix + :param rows_no: the index of row to check + :type rows_no: int + :param col_no: the index of column to check + :type col_no: int + :raise: IndexError if row_no or col_no are out of matrix bounds + """ + row_max, col_max = matrix_size(matrix) + if (row_no < 0 or row_no >= row_max) or (col_no < 0 or col_no >= col_max): + raise IndexError("matrix index out of range") + + +def matrix_size(matrix): + """ + :param matrix: the matrix to compute the size + :type matrix: matrix + :return: the size of matrix (number of rows, number of cols) + :rtype: typle of 2 int + """ + return len(matrix[0]), len(matrix) + + +def matrix_get_cell(matrix, row_no, col_no): + """ + :param matrix: the matrix + :type matrix: matrix + :param rows_no: the row number + :type rows_no: int + :param col_no: the column number + :type col_no: int + :retrun: the content of cell corresponding to row_no x col_no + :rtype: any + """ + _check_matrix_index(matrix, row_no, col_no) + return matrix[col_no][row_no] + + +def matrix_set_cell(matrix, row_no, col_no, val): + """ + set the value val in cell specified by row_no x col_no + + :param matrix: the matrix to modify + :type matrix: matrix + :param row_no: the row number of cell to set + :type rows_no: int + :param col_no: the column number of cell to set + :type col_no: int + :param val: the value to set in cell + :type val: int + """ + _check_matrix_index(matrix, row_no, col_no) + matrix[col_no][row_no] = val + + +def matrix_to_str(matrix): + """ + :param matrix: the matrix to compute the size + :type matrix: matrix + :return: a string representation of the matrix + :rtype: str + """ + s = "" + # by design all matrix cols have same size + for row in zip(*matrix): + cells = [str(cell) for cell in row] + s += " ".join(cells) + "\n" + return s + + +def matrix_mult(matrix, val): + """ + :param matrix: the matrix to compute the size + :type matrix: matrix + :param rows_no: the number of rows + :type rows_no: int + :param col_no: the number of columns + :type col_no: int + :param val: the value to mult the matrix with + :type val: int + :return: a new matrix corresponding the scalar product of matrix * val + :rtype: matrix + """ + new_matrix = [] + for col in matrix: + new_col = [cell * val for cell in col] + new_matrix.append(new_col) + return new_matrix + + +def matrix_get_row(matrix, row_no): + """ + :param matrix: the matrix to compute the size + :type matrix: matrix + :param rows_no: row number + :type rows_no: int + :return: the row of matrix corresponding to row_no + a shallow copy of the row + :rtype: list + """ + _check_matrix_index(matrix, row_no, 0) + row_max, col_max = matrix_size(matrix) + row = [] + for col_n in range(col_max): + row.append(matrix_get_cell(matrix, row_no, col_n)) + return row + + +def matrix_set_row(matrix, row_no, val): + """ + set all cells of row row_no with val + + :param matrix: the matrix to modify + :type matrix: matrix + :param row_no: the row number + :type row_no: int + :param val: the value to put in cells + :type val: any + """ + _check_matrix_index(matrix, row_no, 0) + row_max, col_max = matrix_size(matrix) + for col_n in range(col_max): + matrix_set_cell(matrix, row_no, col_n, val) + + +def matrix_get_col(matrix, col_no): + """ + :param matrix: the matrix get row + :type matrix: matrix + :param col_no: the column number + :type col_no: int + :return: the column corresponding to col_no of matrix + a shallow copy of the col + :rtype: list + """ + _check_matrix_index(matrix, 0, col_no) + col = matrix[col_no][:] + return col + + +def matrix_set_col(matrix, col_no, val): + """ + set all cells of col col_no with val + + :param matrix: the matrix to compute the size + :type matrix: matrix + :param col_no: the column number + :type col_no: int + :param val: the value to put in cells + :type val: any + """ + _check_matrix_index(matrix, 0, col_no) + row_max, col_max = matrix_size(matrix) + for row_n in range(im): + matrix_set_cell(matrix, row_n, col_no, val) + + +def matrix_replace_col(matrix, col_no, col): + """ + replace column col_no with col + + :param matrix: the matrix to compute the size + :type matrix: matrix + :param col_no: the column number to replace + :type col_no: int + :param col: the list of values to use as replacement of column + :type col: list + """ + row_max, col_max = matrix_size(matrix) + if len(col) != col_max: + raise RuntimeError("the size of col {0} does not fit to matrix size {1}x{2}".format(len(col), + row_max, + col_max)) + _check_matrix_index(matrix, 0, col_no) + matrix[col_no] = col + + +def matrix_replace_row(matrix, row_no, row): + """ + replace row row_no with row + + :param matrix: the matrix to compute the size + :type matrix: matrix + :param row_no: the column number + :type row_no: int + :param row: the list of value to use as replacement of row + :type row: list + """ + row_max, col_max = matrix_size(matrix) + if len(row) != row_max: + raise RuntimeError("the size of row {0} does not fit to matrix size {1}x{2}".format(len(row), + row_max, + col_max)) + _check_matrix_index(matrix, row_no, 0) + for col_no, value in enumerate(row): + matrix_set_cell(matrix, row_no, col_no, value) + + + +if __name__ == '__main__': + m = matrix_maker(5, 3) + print m + matrix_set_cell(m,0, 0, 1) + matrix_set_cell(m,0, 2, 2) + matrix_set_cell(m,4, 0, 12) + matrix_set_cell(m,4, 2, 15) + print matrix_to_str(m) + print "get row 0", matrix_get_row(m, 0) + print "get col 0", matrix_get_col(m, 0) + diff --git a/source/_static/code/similarity.py b/source/_static/code/similarity.py new file mode 100644 index 0000000000000000000000000000000000000000..ab45dc991287319e0ee951661e2d0572230b509a --- /dev/null +++ b/source/_static/code/similarity.py @@ -0,0 +1,69 @@ +from matrix import * + +lines = iter( [' A G C U\n', + 'A 1.0 0.5 0.0 0.0\n', + 'G 0.5 1.0 0.0 0.0\n', + 'C 0.0 0.0 1.0 0.5\n', + 'U 0.0 0.0 0.5 1.0\n'] + ) + +def parse_similarity_file(): + """ + parse file containing RNA similarity matrix and return a matrix + """ + sim_matrix = matrix_maker(4, 4) + #skip first line + lines.next() + for row_no, line in enumerate(lines): + line = line.strip() + fields = line.split() + values = [float(val) for val in fields[1:]] + matrix_replace_row(sim_matrix, row_no, values) + return sim_matrix + +def get_similarity(b1, b2, sim_matrix): + """ + :param b1: the first base must be in ('A', 'G', 'C', 'U') + :type b1: string + :param b2: the first base must be in ('A', 'G', 'C', 'U') + :type b2: string + :param sim_matrix: a similarity matrix + :type sim_matrix: matrix + :return: the similarity between b1 and b2 + :rtype: float + """ + bases = {'A':0 , 'G':1, 'C':2, 'U':3} + b1 = b1.upper() + b2 = b2.upper() + if not b1 in bases: + raise KeyError("unknown base b1: " + str(b1)) + if not b2 in bases: + raise KeyError("unknown base b2: " + str(b2)) + return matrix_get_cell(sim_matrix, bases[b1], bases[b2]) + +def compute_similarity(seq1, seq2, sim_matrix): + """ + compute a similarity score between 2 RNA sequence of same lenght + :param seq1: first sequence to compare + :type seq1: string + :param seq2: second sequence to compare + :type seq2: string + :param sim_matrix: the similarity between b1 and b2 + :type sim_matrix: matrix + :return: the similarity score between seq1 and seq2 + :rtype: float + """ + similarities = [] + for b1, b2 in zip(seq1, seq2): + sim = get_similarity(b1, b2, sim_matrix) + similarities.append(sim) + return sum(similarities) + +if __name__ == '__main__': + seq1 = 'AGCAUCUA' + seq2 = 'ACCGUUCU' + sim_matrix = parse_similarity_file() + print matrix_to_str(sim_matrix) + similarity = compute_similarity(seq1, seq2, sim_matrix) + print similarity + \ No newline at end of file diff --git a/source/_static/code/translate.py b/source/_static/code/translate.py new file mode 100644 index 0000000000000000000000000000000000000000..d4238900f7e57b176023f9d9f52dbf66f2e366e4 --- /dev/null +++ b/source/_static/code/translate.py @@ -0,0 +1,50 @@ +genetic_code = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', + 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C', + 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*', + 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W', + 'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R', + 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R', + 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R', + 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R', + 'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S', + 'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S', + 'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R', + 'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R', + 'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G', + 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G', + 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G', + 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G' + } + +def translate(nuc_seq, code): + + prot_seq = '' + start = 0 + while (start + 2) < len(nuc_seq): + end = start + 3 + print start, end + codon = nuc_seq[start:end] + codon = codon.lower() + if codon in code: + prot_seq += code[codon] + else: + raise RuntimeError("unknow codon: " + codon) + start += 3 + return prot_seq + +def translate2(nuc_seq, code, phase = 1): + prot_seq = '' + if 0 < phase < 4 : + start = phase - 1 + elif -4 < phase < 0: + start = -phase - 1 + nuc_seq = nuc_seq[::-1] + while(start + 2) < len(nuc_seq): + end = start + 3 + codon = nuc_seq[start:end].lower() + if codon in code: + prot_seq += code[codon] + else: + raise RuntimeError("unknow codon") + start += 3 + return prot_seq \ No newline at end of file