restriction.py 2.2 KB
Newer Older
1
2
from operator import itemgetter

Bertrand  NÉRON's avatar
Bertrand NÉRON committed
3
# we decide to implement enzyme as tuple with the following structure
4
5
6
# (name, comment, sequence, cut, end)
#    0     1         2       3    4

7

Bertrand  NÉRON's avatar
Bertrand NÉRON committed
8
def one_enz_one_binding_site(dna, enzyme):
9
    """
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
10
11
    :return: the first position of enzyme binding site in dna or None if there is not
    :rtype: int or None
12
    """
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
13
    pos = dna.find(enzyme[2])
14
15
16
17
    if pos != -1:
        return pos

        
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
18
def one_enz_all_binding_sites(dna, enzyme):
19
    """
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
20
21
22
    :param dna: the dna sequence to search enzyme binding sites
    :type dna: str
    :param enzyme: the enzyme to looking for
23
    :type enzyme:  a tuple (str name, str comment, str sequence, int cut, str end)
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
24
25
    :return: all positions of enzyme binding sites in dna
    :rtype: list of int
26
27
    """
    positions = []
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
28
    pos = dna.find(enzyme[2])
29
30
    while pos != -1:
        positions.append(pos)
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
31
        pos = dna.find(enzyme[2], pos + 1)
32
33
34
    return positions


35
36
37
38
39
def one_enz_all_binding_sites2(dna, enzyme):
    """
    :param dna: the dna sequence to search enzyme binding sites
    :type dna: str
    :param enzyme: the enzyme to looking for
40
    :type enzyme:  a tuple (str name, str comment, str sequence, int cut, str end)
41
42
43
44
    :return: all positions of enzyme binding sites in dna
    :rtype: list of int
    """
    positions = []
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
45
    pos = dna.find(enzyme[2])
46
47
48
49
50
51
    while pos != -1:
        if positions:
            positions.append(pos)
        else:
            positions = pos + positions[-1]
        new_seq = dna[pos + 1:]
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
52
        pos = new_seq.find(enzyme[2])
53
54
55
56
        pos = pos
    return positions


57
58
59
def binding_sites(dna, enzymes):
    """
    return all positions of all enzymes binding sites present in dna
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
60
61
62
63
    sort by the increasing position.

    :param dna: the dna sequence to search enzyme binding sites
    :type dna: str
64
65
    :param enzymes: the enzymes to looking for
    :type enzymes: a sequence of enzyme tuple
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
66
67
    :return: all positions of each enzyme binding sites in dna
    :rtype: list of int
68
69
70
    """
    positions = []
    for enzyme in enzymes:
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
71
        pos = one_enz_all_binding_sites(dna, enzyme)
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
72
        pos = [(enzyme[0], pos) for pos in pos]
73
        positions.extend(pos)
Bertrand  NÉRON's avatar
Bertrand NÉRON committed
74
    positions.sort(key=itemgetter(1))
75
76
    return positions