from functools import total_ordering @total_ordering class Dgraph(object): """docstring for Dgraph""" def __init__(self, center): super(Dgraph, self).__init__() self.idx = -1 self.center = center self.score = 0 self.halves = [[], []] self.connexity = [{}, {}] self.nodes = [self.center] self.node_set = set(self.nodes) self.ordered_list = None self.sorted_list = None self.edges = set() @staticmethod def load(lcp_txt, score, variables): """ Static method to load a dgraph from a text :param lcp_txt: the saved d-graph :return: a new d-graph object corresponding to the test """ # basic split lcp_txt = lcp_txt.replace(']', '') _, head, h1, h2 = lcp_txt.split('[') # Head parsing center = head.replace(' ', '') dg = Dgraph(center) # Reload halves h1 = [x for x in h1.split(',')] h2 = [x for x in h2.split(',')] dg.halves[0] = h1 dg.node_set.update(h1) dg.nodes.extend(h1) dg.halves[1] = h2 dg.node_set.update(h2) dg.nodes.extend(h2) # Score parsing dg.score = int(score.split('/')[0]) # covering variable loading dg.edges = {int(x) for x in variables.split(' ')} return dg def put_halves(self, h1, h2, graph): """ Compute the d-graph quality (score) according to the connectivity between the two halves. :param h1: First half of the d-graph :param h2: Second half of the d-graph :param graph: The barcode graph """ self.score = 0 # First half self.halves[0] = h1 self.node_set.update(h1) self.nodes.extend(h1) self.connexity[0] = {key: 0 for key in h1} # second half self.halves[1] = h2 self.node_set.update(h2) self.nodes.extend(h2) self.connexity[1] = {key: 0 for key in h2} # Compute link arities for node1 in self.halves[0]: neighbors = set(graph.neighbors(node1)) for node2 in self.halves[1]: if node1 == node2 or node2 in neighbors: self.score += 1 self.connexity[0][node1] += 1 self.connexity[1][node2] += 1 # Compute covering variable subgraph = graph.subgraph(self.nodes) for _, _, id in subgraph.edges.data("uniq_idx"): self.edges.add(id) def get_link_divergence(self): return int(abs(self.score - self.get_optimal_score())) def get_optimal_score(self): max_len = max(len(self.halves[0]), len(self.halves[1])) return int(max_len * (max_len - 1) / 2) def to_sorted_list(self): if self.sorted_list is None: self.sorted_list = sorted(self.nodes) return self.sorted_list def to_ordered_lists(self): if self.ordered_list is None: hands = [[], []] for idx in range(2): prev_connectivity = -1 for node in self.halves[idx]: # group nodes by similar connectivity value = self.connexity[idx][node] if value != prev_connectivity: hands[idx].append([]) prev_connectivity = value hands[idx][-1].append(node) self.ordered_list = hands[0][::-1] + [[self.center]] + hands[1] return self.ordered_list def to_node_set(self): return frozenset(self.to_sorted_list()) def to_uniq_triplet(self): """ Return the triplet (center, left_clique, right_clique) where the cliques are sorted by name. The left clique have a lexicography order smaller than the right one. """ left = sorted(self.halves[0]) left_repr = ",".join(str(node) for node in left) right = sorted(self.halves[1]) right_repr = ",".join(str(node) for node in right) if left_repr > right_repr: save = left left = right right = save return self.center, left, right def distance_to(self, dgraph): nodes_1 = self.to_sorted_list() nodes_2 = other_nodes = dgraph.to_sorted_list() dist = 0 idx1, idx2 = 0, 0 while idx1 != len(nodes_1) and idx2 != len(nodes_2): if nodes_1[idx1] == nodes_2[idx2]: idx1 += 1 idx2 += 1 else: dist += 1 if nodes_1[idx1] < nodes_2[idx2]: idx1 += 1 else: idx2 += 1 dist += len(nodes_1) - idx1 + len(nodes_2) - idx2 return dist """ Verify if dg1 is dominated by dg2. The domination is determined by two points: All the nodes of dg1 are part of dg2 and the divergeance of dg1 is greater than dg2. @param dg1 (resp dg2) A d_graph object. @return True if dg1 is dominated by dg2. """ def is_dominated(self, dg): dg1_nodes = self.to_node_set() dg2_nodes = dg.to_node_set() # domination first condition: inclusion of all the nodes if not dg1_nodes.issubset(dg2_nodes): return False # domination second condition if len(dg1_nodes) == len(dg2_nodes): if self.get_link_divergence() > dg.get_link_divergence(): return True elif self.get_link_divergence() >= dg.get_link_divergence(): return True return False def __eq__(self, other): if other is None: return False if self.idx != -1 and self.idx == other.idx: return True if self.node_set != other.node_set: return False return self.to_uniq_triplet() == other.to_uniq_triplet() def __ne__(self, other): return not (self == other) def __lt__(self, other): my_tuple = (self.get_link_divergence(), self.get_optimal_score()) other_tuple = (other.get_link_divergence(), other.get_optimal_score()) return my_tuple < other_tuple def __hash__(self): return str(self).__hash__() def __full_repr__(self): # print(self.halves) representation = str(self.center) + " " representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]" representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]" return representation def __repr__(self): c, left, right = self.to_uniq_triplet() return f"[{c}][{','.join(str(x) for x in left)}][{','.join(str(x) for x in right)}]"