from operator import itemgetter # we decide to implement enzyme as tuple with the following structure # (name, comment, sequence, cut, end) # 0 1 2 3 4 def one_enz_one_binding_site(dna, enzyme): """ :return: the first position of enzyme binding site in dna or None if there is not :rtype: int or None """ pos = dna.find(enzyme[2]) if pos != -1: return pos def one_enz_all_binding_sites(dna, enzyme): """ :param dna: the dna sequence to search enzyme binding sites :type dna: str :param enzyme: the enzyme to looking for :type enzyme: a tuple (str name, str comment, str sequence, int cut, str end) :return: all positions of enzyme binding sites in dna :rtype: list of int """ positions = [] pos = dna.find(enzyme[2]) while pos != -1: positions.append(pos) pos = dna.find(enzyme[2], pos + 1) return positions def one_enz_all_binding_sites2(dna, enzyme): """ :param dna: the dna sequence to search enzyme binding sites :type dna: str :param enzyme: the enzyme to looking for :type enzyme: a tuple (str name, str comment, str sequence, int cut, str end) :return: all positions of enzyme binding sites in dna :rtype: list of int """ positions = [] pos = dna.find(enzyme[2]) while pos != -1: if positions: positions.append(pos) else: positions = pos + positions[-1] new_seq = dna[pos + 1:] pos = new_seq.find(enzyme[2]) pos = pos return positions def binding_sites(dna, enzymes): """ return all positions of all enzymes binding sites present in dna sort by the increasing position. :param dna: the dna sequence to search enzyme binding sites :type dna: str :param enzymes: the enzymes to looking for :type enzymes: a sequence of enzyme tuple :return: all positions of each enzyme binding sites in dna :rtype: list of int """ positions = [] for enzyme in enzymes: pos = one_enz_all_binding_sites(dna, enzyme) pos = [(enzyme[0], pos) for pos in pos] positions.extend(pos) positions.sort(key=itemgetter(1)) return positions