restriction.py

from operator import itemgetter

# we decide to implement enzyme as tuple with the following structure
# ("name", "comment", "sequence", "cut", "end")
#    0         1           2        3      4

def one_enz_one_binding_site(dna, enzyme):
    """
    :return: the first position of enzyme binding site in dna or None if there is not
    :rtype: int or None
    """
    pos = dna.find(enzyme[2])
    if pos != -1:
        return pos


def one_enz_all_binding_sites(dna, enzyme):
    """
    :param dna: the dna sequence to search enzyme binding sites
    :type dna: str
    :param enzyme: the enzyme to looking for
    :type enzyme:  a namedtuple RestrictEnzyme
    :return: all positions of enzyme binding sites in dna
    :rtype: list of int
    """
    positions = []
    pos = dna.find(enzyme[2])
    while pos != -1:
        positions.append(pos)
        pos = dna.find(enzyme[2], pos + 1)
    return positions


def one_enz_all_binding_sites2(dna, enzyme):
    """
    :param dna: the dna sequence to search enzyme binding sites
    :type dna: str
    :param enzyme: the enzyme to looking for
    :type enzyme:  a namedtuple RestrictEnzyme
    :return: all positions of enzyme binding sites in dna
    :rtype: list of int
    """
    positions = []
    pos = dna.find(enzyme[2])
    while pos != -1:
        if positions:
            positions.append(pos)
        else:
            positions = pos + positions[-1]
        new_seq = dna[pos + 1:]
        pos = new_seq.find(enzyme[2])
        pos = pos
    return positions


def binding_sites(dna, enzymes):
    """
    return all positions of all enzymes binding sites present in dna
    sort by the increasing position.

    :param dna: the dna sequence to search enzyme binding sites
    :type dna: str
    :param enzyme: the enzyme to looking for
    :type enzyme:  a namedtuple RestrictEnzyme
    :return: all positions of each enzyme binding sites in dna
    :rtype: list of int
    """
    positions = []
    for enzyme in enzymes:
        pos = one_enz_all_binding_sites(dna, enzyme)
        pos = [(enzyme[0], pos) for pos in pos]
        positions.extend(pos)
    positions.sort(key=itemgetter(1))
    return positions