Commit bb91a7b9 authored by Nicolas  MAILLET's avatar Nicolas MAILLET

Add TP5 solution

parent f8376b0a
""" TP5 where we play with suffix array """
# Import the Suffix Array
# (adapted from https://pypi.org/project/pysuffixarray/)
import suffix_array as sa
def first_occurrence(suffix_array, pattern):
""" Exact string matching
Return the first positions in the string
where the pattern occurs, using a binary search.
"""
# Find the FIRST occurrence of the pattern
# (starting position of the final range)
# The search starts at the beginning of the array...
start = 0
# ... and stops at the end
end = len(suffix_array.array) - 1
# Find the starting position
while start < end:
# Get the middle value (euclidean division)
mid = (start+end) // 2
# Is the current prefix in the suffix array (of the length of the pattern) is smaller than the pattern?
if suffix_array.string[suffix_array.array[mid]:suffix_array.array[mid]+len(pattern)] < pattern:
# Move starting position to current position (+1 because we already try this position and it is not good)
start = mid + 1
else:
# Move the end to current position
end = mid
# Return the corresponding value in the suffix array
return suffix_array.array[start]
def match(suffix_array, pattern):
""" Exact string matching
Return the positions in the string
where the pattern occurs, using two binary search.
"""
# Find the FIRST occurrence of the pattern
# (starting position of the final range)
# The search starts at the beginning of the array...
start = 0
# ... and stops at the end
end = len(suffix_array.array) - 1
# Find the starting position
while start < end:
# Get the middle value (euclidean division)
mid = (start+end) // 2
# Is the current prefix in the suffix array (of the length of the pattern) is smaller than the pattern?
if suffix_array.string[suffix_array.array[mid]:suffix_array.array[mid]+len(pattern)] < pattern:
# Move starting position to current position (+1 because we already try this position and it is not good)
start = mid + 1
else:
# Move the end to current position
end = mid
# Here we have the beginning
beginning = start
# Find the LAST occurrence of the pattern
# (ENDING position of the final range)
# Another way of seeing this, easier, is to search the first
# greater occurrence and decrement by 1
# The search starts at previous position, we don't have to try what is upper
# Reset ending position
end = len(suffix_array.array) - 1
# Find the ending position
while start < end:
# Get the middle value (euclidean division)
mid = (start+end) // 2
# Is the current prefix in the suffix array (of the length of the pattern) is bigger than the pattern?
if suffix_array.string[suffix_array.array[mid]:suffix_array.array[mid]+len(pattern)] > pattern:
# Move ending position to current position
end = mid
else:
# Move starting position to current position (+1 because we already try this position and it is not good)
start = mid + 1
# Prepare the return
# We want everything between beginning and end positions
ret = []
# Beginning and end are index in the suffix array
for i in range(beginning, end):
# Get the values corresponding
ret.append(suffix_array.array[i])
# Return them
return ret
def main():
""" The main of TP5 """
my_sa = sa.SuffixArray("ACACAG")
print(my_sa)
print("First occurrence of C:")
print(first_occurrence(my_sa, "C"))
print("\nAll occurrence of CA:")
posi = match(my_sa, "CA")
print("Position in the suffix array:\n{}\n\nCorresponding suffixes: ".format(posi))
for i in posi:
print(my_sa.string[i:])
# Launch the main
main()
# Exit without error
exit(0)
# Always put one extra return line
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment