Commit 41353ab3 authored by Nicolas  MAILLET's avatar Nicolas MAILLET

remove TP3 skeleton

parent 10f1ba0e
class SuffixArray:
""" Adapted from https://pypi.org/project/pysuffixarray/ """
def __init__(self, string):
self.string = string + '$'
self.array = self._construct_suffix_array(self.string)
def _construct_suffix_array(self, string):
""" Constructs suffix array in O(nlogn) time by sorting ranking
pairs of suffixes."""
string_len = len(string)
suffix_array = list(range(string_len))
rank_array = [ord(c) for c in string]
k = 1
# This sorting process will be repeated at most log(n) times.
while k < string_len:
# At first, sort suffixes with the first elements of ranking pairs.
suffix_array = self._sort(suffix_array, rank_array, string_len, k)
# Next, sort suffixes with the second elements of ranking pairs.
suffix_array = self._sort(suffix_array, rank_array, string_len, 0)
# Recompute rank of suffixes.
rank_array = self._rerank(suffix_array, rank_array, k)
k *= 2
return suffix_array
def _sort(self, suffix_array, rank_array, string_len, k):
""" Sorts suffixes by count-sorting rank array.
Offset k is defined such that the value used when sorting
suffix i corresponds to rank_array[i + k].
"""
max_length = max(2**7 - 1, string_len)
count = [0] * max_length
for i in range(len(rank_array)):
if i + k < string_len:
count[rank_array[i + k]] += 1
else:
count[0] += 1
cumsum = 0
for i in range(max_length):
tmp = count[i]
count[i] = cumsum
cumsum += tmp
temp_suffix_array = [-1] * string_len
for i, _ in enumerate(suffix_array):
if suffix_array[i] + k < string_len:
target_index = rank_array[suffix_array[i] + k]
else:
target_index = 0
temp_suffix_array[count[target_index]] = suffix_array[i]
count[target_index] += 1
return temp_suffix_array
def _rerank(self, suffix_array, rank_array, k):
""" Recomputes rank of suffixes. When consecutive suffixes with
identical ranking pairs are found,
assigns same ranks to them.
"""
temp_rank_array = [0] * len(rank_array)
r, s = rank_array, suffix_array
rank = 0
for i in range(1, len(rank_array)):
# When ranking pairs are identical, do not increment the rank.
if r[s[i]] == r[s[i-1]] and r[s[i] + k] == r[s[i-1] + k]:
temp_rank_array[s[i]] = rank
else:
rank += 1
temp_rank_array[s[i]] = rank
return temp_rank_array
# self representation for print
def __repr__(self):
# Return the content of the cell
ret = "String {}\nSuffix array: {}\n".format(self.string, self.array)
ret += "\nEasier representation:\ni\tSA\tSuffix\n"
for pos, val in enumerate(self.array):
ret += "{}\t{}:\t{}\n".format(pos, val, self.string[val:])
return ret
""" TP3 where we code a global alignment program
based on Needleman-Wunsch algorithm """
def simple_display(seq_top, seq_left, score):
""" Do a simple display """
print("Seq_top: {}\nSeq_left: {}\nScore: {}".format(seq_top[::-1], seq_left[::-1], score))
def nice_display(seq_top, seq_left, score):
""" Do a nice display """
# What will be printed
to_print = ""
# Print seq_top
# For each positions in a sequence
for i, _ in enumerate(seq_top):
# Add the corresponding letter (reverse order)
to_print += seq_top[-(i+1)]
# Print a next line after printing the seq_top
to_print += "\n"
# Print middle line
# For each positions in a sequence
for i, _ in enumerate(seq_top):
# If it is a match between the two sequences
if seq_top[-(i+1)] == seq_left[-(i+1)]:
# Print a pipe
to_print += "|"
# Otherwise
else:
# Print a space
to_print += " "
# Print a next line after printing the middle line
to_print += "\n"
# Print seq_left
# For each positions in a sequence
for i, _ in enumerate(seq_top):
# Add the corresponding letter (reverse order)
to_print += seq_left[-(i+1)]
# Add the score at the end
to_print += "\nScore: {}\n".format(score)
# Print everything!
print(to_print)
class DynamicMatrix:
""" Class to generate an empty matrix """
def __init__(self, seq_top, seq_left, match, mismatch, indel):
# Init all "self" variables
# Create the matrix of Cell()
# self representation for print
def __repr__(self):
# What will be returned
return "Scores:\n{}\nPrev_pos:\n{}\n\n".format(self.print_scores(), self.print_prev_pos())
# self representation for print
def print_scores(self):
""" Output the values of the matrix """
# What will be returned
ret_scores = ". . "
# Print top_seq
for i in self.seq_top:
ret_scores += " {} ".format(i)
# New line
ret_scores += "\n"
# For each line
for ind, i in enumerate(self.matrix):
# Print seq_left
if ind > 0:
ret_scores += "{} ".format(self.seq_left[ind-1])
else:
ret_scores += ". "
# For each column
for j in i:
# If this cell has no value
if j.score is None:
# Add a dot to the return
ret_scores += (" . ")
# If this cell is not empty
else:
# Add its content to the return
tmp_val = str(j.score)
if len(tmp_val) == 1:
ret_scores += " " + tmp_val + " "
if len(tmp_val) == 2:
ret_scores += tmp_val + " "
if len(tmp_val) == 3:
ret_scores += tmp_val
# Always add a space after the value we add
ret_scores += " "
# End of this line, go to next line
ret_scores += "\n"
# Return the content of the Matrix
return ret_scores
# self representation for print
def print_prev_pos(self):
""" Output the values of the matrix """
# What will be returned
ret_prev_pos = ". . "
# Print top_seq
for i in self.seq_top:
ret_prev_pos += " {} ".format(i)
# New line
ret_prev_pos += "\n"
# For each line
for ind, i in enumerate(self.matrix):
# Print seq_left
if ind > 0:
ret_prev_pos += "{} ".format(self.seq_left[ind-1])
else:
ret_prev_pos += ". "
# For each column
for j in i:
# If this cell has no value
if j.prev_pos is None:
# Add a dot to the return
ret_prev_pos += (". ")
# If this cell is not empty
else:
# Add its content to the return
tmp_val = str(j.prev_pos)
ret_prev_pos += tmp_val
# Always add a space after the value we add
ret_prev_pos += " "
# End of this line, go to next line
ret_prev_pos += "\n"
# Return the content of the Matrix
return ret_prev_pos
def init(self):
""" Initialize the matrix, i.e. fill the first line and column """
# First cell is 0
def fill_matrix(self):
""" Fill-up the matrix """
def global_alignment(self):
""" Make a global alignment of two sequences """
def main():
""" The main of TP3"""
mat = DynamicMatrix("ACGGCTAT", "ACTGTAG", 2, -1, -2)
#mat.init()
#mat.fill_matrix()
#print(mat)
#al_seq_top, al_seq_left, score = mat.global_alignment()
#nice_display(al_seq_top, al_seq_left, score)
# Launch the main
main()
# Exit without error
exit(0)
# Always put one extra return line
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment