d_graph.py 9.98 KB
Newer Older
1
2
3
4
import networkx as nx
import math
from functools import total_ordering

Yoann Dufresne's avatar
Yoann Dufresne committed
5

6
7
8
@total_ordering
class Dgraph(object):
    """docstring for Dgraph"""
Yoann Dufresne's avatar
Yoann Dufresne committed
9
    def __init__(self, center):
10
        super(Dgraph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
11
        self.idx = -1
Yoann Dufresne's avatar
Yoann Dufresne committed
12
        self.center = center
13
        self.score = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
14
15
        self.halves = [None,None]
        self.connexity = [None,None]
16
        self.nodes = [self.center]
Yoann Dufresne's avatar
Yoann Dufresne committed
17
        self.edges = []
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36


    """ Static method to load a dgraph from a text
        @param text the saved d-graph
        @param barcode_graph Barcode graph from which the d-graph is extracted
        @return a new d-graph object corresponding to the test 
    """
    def load(text, barcode_graph):
        # basic split
        text = text.replace(']', '')
        head, h1, h2 = text.split('[')

        # Head parsing
        head = head.split(' ')
        dg = Dgraph(head[-3])
        if len(head) == 4:
            dg.idx = int(head[0])

        return dg
37
38
39
40
41
42
43
44
45
46
47
        

    """ Compute the d-graph quality (score) according to the connectivity between the two halves.
        @param h1 First half of the d-graph
        @param h2 Second half of the d-graph
        @param graph The connectivity graph
    """
    def put_halves(self, h1, h2, graph):
        self.score = 0
        self.halves[0] = h1
        self.halves[1] = h2
48
        self.nodes = sorted([self.center] + self.halves[0] + self.halves[1])
Yoann Dufresne's avatar
Yoann Dufresne committed
49
50
        self.connexity[0] = {key:0 for key in self.halves[0]}
        self.connexity[1] = {key:0 for key in self.halves[1]}
Yoann Dufresne's avatar
Yoann Dufresne committed
51
        self.edges = []
52
53

        # Compute link arities
54
        for node1 in self.halves[0]:
55
56
            neighbors = set(graph.neighbors(node1))

57
            for node2 in self.halves[1]:
58
59
                if node1 == node2 or node2 in neighbors:
                    self.score += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
60
61
62
                    self.connexity[0][node1] += 1
                    self.connexity[1][node2] += 1

63
64
65
66
        # Compute links from the center to the other nodes
        for idx, node1 in enumerate(self.nodes):
            for node2 in self.nodes[idx+1:]:
                if graph.has_edge(node1, node2):
Yoann Dufresne's avatar
Yoann Dufresne committed
67
68
                    if node1 < node2:
                        self.edges.append((node1, node2))
69
                    else:
Yoann Dufresne's avatar
Yoann Dufresne committed
70
71
                        self.edges.append((node2, node1))

Yoann Dufresne's avatar
Yoann Dufresne committed
72
73
74
75
        # Sort the halves by descending connexity
        connex = self.connexity
        self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
        self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
76
77


78
    def get_link_divergence(self):
79
        return abs(self.score - self.get_optimal_score())
80
81
82
83
84
85
86


    def get_optimal_score(self):
        max_len = max(len(self.halves[0]), len(self.halves[1]))
        return max_len * (max_len - 1) / 2


Yoann Dufresne's avatar
Yoann Dufresne committed
87
88
89
90
    def to_list(self):
        return self.halves[0]+ [self.center] + self.halves[1] 


91
92
93
94
95
96
97
98
99
100
101
102
103
    def to_ordered_lists(self):
        hands = [[],[]]
        for idx in range(2):
            prev_connectivity = -1
            for node in self.halves[idx]:
                # group nodes by similar connectivity
                value = self.connexity[idx][node]
                if value != prev_connectivity:
                    hands[idx].append([])
                    prev_connectivity = value
                hands[idx][-1].append(node)

        return hands[0][::-1] + [[self.center]] + hands[1]
Yoann Dufresne's avatar
Yoann Dufresne committed
104
105


Yoann Dufresne's avatar
Yoann Dufresne committed
106
107
    def to_node_multiset(self):
        return frozenset(self.to_list())
108
109


110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    def distance_to(self, dgraph):
        other_nodes = dgraph.nodes

        dist = 0
        idx1, idx2 = 0, 0
        while(idx1 != len(self.nodes) and idx2 != len(other_nodes)):
            if self.nodes[idx1] == other_nodes[idx2]:
                idx1 += 1
                idx2 += 1
            else:
                dist += 1
                if self.nodes[idx1] < other_nodes[idx2]:
                    idx1 += 1
                else:
                    idx2 += 1
        dist += len(self.nodes) - idx1 + len(other_nodes) - idx2

        return dist


130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    """ Verify if dg1 is dominated by dg2. The domination is determined by two points: All the nodes
    of dg1 are part of dg2 and the divergeance of dg1 is greater than dg2.
    @param dg1 (resp dg2) A d_graph object.
    @return True if dg1 is dominated by dg2.
    """
    def is_dominated(self, dg):
        dg1_nodes = frozenset(self.to_list())
        dg2_nodes = frozenset(dg.to_list())

        # domination first condition: inclusion of all the nodes
        if not dg1_nodes.issubset(dg2_nodes):
            return False

        # domination second condition
        if len(dg1_nodes) == len(dg2_nodes):
145
            if self.get_link_divergence() > dg.get_link_divergence():
146
                return True
147
        elif self.get_link_divergence() >= dg.get_link_divergence():
148
149
150
151
152
            return True

        return False


153
    def __eq__(self, other):
Yoann Dufresne's avatar
Yoann Dufresne committed
154
        return self.to_ordered_lists() == other.to_ordered_lists()
155
156
157
158
159

    def __ne__(self, other):
        return not (self == other)

    def __lt__(self, other):
160
161
        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
162
163
        return (my_tuple < other_tuple)

Yoann Dufresne's avatar
Yoann Dufresne committed
164

165
    def __hash__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
166
        nodelist = list(self.to_list())
167
        nodelist = [str(x) for x in nodelist]
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
        nodelist.sort()
        return ",".join(nodelist).__hash__()

        
    def __ordered_hash__(self):
        lst = self.to_ordered_lists()

        fwd_uniq_lst = [sorted(l) for l in lst]
        fwd_str = ",".join([f"[{'-'.join(l)}]" for l in fwd_uniq_lst])
        fwd_hash = fwd_str.__hash__()

        rev_uniq_lst = [sorted(l) for l in lst[::-1]]
        rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst])
        rev_hash = rev_str.__hash__()

        return int(min(fwd_hash, rev_hash))


186
    def __repr__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
187
        # print(self.halves)
Yoann Dufresne's avatar
Yoann Dufresne committed
188
189
        representation = "" if self.idx == -1 else f"{self.idx} "
        representation += str(self.center) + " " + str(self.score) + "/" + str(self.get_optimal_score()) + " "
Yoann Dufresne's avatar
Yoann Dufresne committed
190
191
192
        representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]"
        representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]"
        return representation
193
194
195
196
197
198


""" From a barcode graph, compute all the possible max d-graphs by node.
    @param graph A barcode graph
    @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
"""
Yoann Dufresne's avatar
Yoann Dufresne committed
199
def compute_all_max_d_graphs(graph, debug=False):
200
    d_graphs = {}
201
202
203
204
205

    for node in list(graph.nodes()):
        neighbors = list(graph.neighbors(node))
        neighbors_graph = nx.Graph(graph.subgraph(neighbors))

206
        node_d_graphs = []
207
208
209
210
211
212
213
214
215
        # Find all the cliques (equivalent to compute all the candidate half d-graph)
        cliques = list(nx.find_cliques(neighbors_graph))

        # Pair halves to create d-graphes
        for idx, clq1 in enumerate(cliques):
            for clq2_idx in range(idx+1, len(cliques)):
                clq2 = cliques[clq2_idx]

                # Check for d-graph candidates
Yoann Dufresne's avatar
Yoann Dufresne committed
216
                d_graph = Dgraph(node)
217
218
                d_graph.put_halves(clq1, clq2, neighbors_graph)
                
219
                if d_graph.get_link_divergence() > d_graph.get_optimal_score() / 2:
220
221
                    continue
                
222
                node_d_graphs.append(d_graph)
223
224
225

        # Cut the the distribution queue

226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
        d_graphs[node] = sorted(node_d_graphs)
        # print(node_d_graphs)

    return d_graphs


""" Add the new dg in the dgs list. If dg is dominated by another dg in the list, then it's
    dropped. If any dg in the list is dominated by the dg to add, then, the new dg is added and
    all the dominated dg are removed from the list.
    @param dg A new dg to add/filter.
    @param undominated_dgs_list A list of dg where any of them is dominated by another one.
    @return The updated undominated list. 
"""
def add_new_dg_regarding_domination(dg, undominated_dgs_list):
    to_remove = []

    # Search for domination relations
    for u_dg in undominated_dgs_list:
        if len(to_remove) == 0 and dg.is_dominated(u_dg):
            return undominated_dgs_list
        elif u_dg.is_dominated(dg):
            to_remove.append(u_dg)

    # Remove dominated values
    for dg2 in to_remove:
        undominated_dgs_list.remove(dg2)

    # Add the new dg
    undominated_dgs_list.append(dg)

    return undominated_dgs_list


259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274

def filter_dominated(d_graphs, overall=False, in_place=True):
    if not overall:
        return local_domination_filter(d_graphs, in_place)

    all_d_graphs = []
    for dgs in d_graphs.values():
        all_d_graphs.extend(dgs)

    print(len(all_d_graphs))
    all_d_graphs = list_domination_filter(all_d_graphs)
    print(len(all_d_graphs))

    return d_graphs


275
276
277
278
279
280
281
282
""" Filter the d-graphs by node. In a list of d-graph centered on a node n, if a d-graph is
    completly included in another and have a highest distance score to the optimal, then it is
    filtered out.
    @param d_graphs All the d-graphs to filter, sorted by central node.
    @param in_place If true, modify the content of d_graph with the filtered version. If False,
    copy all the content in a new dictionnary.
    @return The filtered dictionnary of d-graph per node.
"""
283
def local_domination_filter(d_graphs, in_place=True):
284
285
286
287
    filtered = d_graphs if in_place else {}

    # Filter node by node
    for node, d_graph_list in d_graphs.items():
288
289
        # Add the non filtered d-graph to the output
        filtered[node] = list_domination_filter(d_graph_list)
290

291
    return filtered
292

293
294
295
296
297
298
299
300
301
302
303
304
305

""" Filter the input d-graphs list. In the list of d-graph centered on a node n, if a d-graph is
    completly included in another and have a highest distance score to the optimal, then it is
    filtered out.
    @param d_graphs All the d-graphs to filter.
    @return The filtered dictionnary of d-graph per node.
"""
def list_domination_filter(d_graphs):
    filtered = []

    # Filter d-graph by d-graph
    for dg in d_graphs:
        add_new_dg_regarding_domination(dg, filtered)
306

307
    return filtered