d_graph.py 10.2 KB
Newer Older
1
2
3
4
import networkx as nx
import math
from functools import total_ordering

Yoann Dufresne's avatar
Yoann Dufresne committed
5

6
7
8
@total_ordering
class Dgraph(object):
    """docstring for Dgraph"""
Yoann Dufresne's avatar
Yoann Dufresne committed
9
    def __init__(self, center):
10
        super(Dgraph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
11
        self.idx = -1
Yoann Dufresne's avatar
Yoann Dufresne committed
12
        self.center = center
13
        self.score = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
14
15
        self.halves = [None,None]
        self.connexity = [None,None]
16
        self.nodes = [self.center]
Yoann Dufresne's avatar
Yoann Dufresne committed
17
        self.edges = []
18
19
20
21
22
23
24
25
26
27
28
29
30
31


    """ Static method to load a dgraph from a text
        @param text the saved d-graph
        @param barcode_graph Barcode graph from which the d-graph is extracted
        @return a new d-graph object corresponding to the test 
    """
    def load(text, barcode_graph):
        # basic split
        text = text.replace(']', '')
        head, h1, h2 = text.split('[')

        # Head parsing
        head = head.split(' ')
32
        dg = Dgraph(int(head[-3]))
33
34
35
        if len(head) == 4:
            dg.idx = int(head[0])

36
37
38
39
40
        # Reload halves
        h1 = [int(x.split(' ')[-2]) for x in h1.split(',')]
        h2 = [int(x.split(' ')[-2]) for x in h2.split(',')]
        dg.put_halves(h1, h2, barcode_graph)

41
        return dg
42
43
44
45
46
47
48
49
50
51
52
        

    """ Compute the d-graph quality (score) according to the connectivity between the two halves.
        @param h1 First half of the d-graph
        @param h2 Second half of the d-graph
        @param graph The connectivity graph
    """
    def put_halves(self, h1, h2, graph):
        self.score = 0
        self.halves[0] = h1
        self.halves[1] = h2
53
        self.nodes = sorted([self.center] + self.halves[0] + self.halves[1])
Yoann Dufresne's avatar
Yoann Dufresne committed
54
55
        self.connexity[0] = {key:0 for key in self.halves[0]}
        self.connexity[1] = {key:0 for key in self.halves[1]}
Yoann Dufresne's avatar
Yoann Dufresne committed
56
        self.edges = []
57
58

        # Compute link arities
59
        for node1 in self.halves[0]:
60
61
            neighbors = set(graph.neighbors(node1))

62
            for node2 in self.halves[1]:
63
64
                if node1 == node2 or node2 in neighbors:
                    self.score += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
65
66
67
                    self.connexity[0][node1] += 1
                    self.connexity[1][node2] += 1

68
69
70
71
        # Compute links from the center to the other nodes
        for idx, node1 in enumerate(self.nodes):
            for node2 in self.nodes[idx+1:]:
                if graph.has_edge(node1, node2):
Yoann Dufresne's avatar
Yoann Dufresne committed
72
73
                    if node1 < node2:
                        self.edges.append((node1, node2))
74
                    else:
Yoann Dufresne's avatar
Yoann Dufresne committed
75
76
                        self.edges.append((node2, node1))

Yoann Dufresne's avatar
Yoann Dufresne committed
77
78
79
80
        # Sort the halves by descending connexity
        connex = self.connexity
        self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
        self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
81
82


83
    def get_link_divergence(self):
84
        return abs(self.score - self.get_optimal_score())
85
86
87
88
89
90
91


    def get_optimal_score(self):
        max_len = max(len(self.halves[0]), len(self.halves[1]))
        return max_len * (max_len - 1) / 2


Yoann Dufresne's avatar
Yoann Dufresne committed
92
93
94
95
    def to_list(self):
        return self.halves[0]+ [self.center] + self.halves[1] 


96
97
98
99
100
101
102
103
104
105
106
107
108
    def to_ordered_lists(self):
        hands = [[],[]]
        for idx in range(2):
            prev_connectivity = -1
            for node in self.halves[idx]:
                # group nodes by similar connectivity
                value = self.connexity[idx][node]
                if value != prev_connectivity:
                    hands[idx].append([])
                    prev_connectivity = value
                hands[idx][-1].append(node)

        return hands[0][::-1] + [[self.center]] + hands[1]
Yoann Dufresne's avatar
Yoann Dufresne committed
109
110


Yoann Dufresne's avatar
Yoann Dufresne committed
111
112
    def to_node_multiset(self):
        return frozenset(self.to_list())
113
114


115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    def distance_to(self, dgraph):
        other_nodes = dgraph.nodes

        dist = 0
        idx1, idx2 = 0, 0
        while(idx1 != len(self.nodes) and idx2 != len(other_nodes)):
            if self.nodes[idx1] == other_nodes[idx2]:
                idx1 += 1
                idx2 += 1
            else:
                dist += 1
                if self.nodes[idx1] < other_nodes[idx2]:
                    idx1 += 1
                else:
                    idx2 += 1
        dist += len(self.nodes) - idx1 + len(other_nodes) - idx2

        return dist


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
    """ Verify if dg1 is dominated by dg2. The domination is determined by two points: All the nodes
    of dg1 are part of dg2 and the divergeance of dg1 is greater than dg2.
    @param dg1 (resp dg2) A d_graph object.
    @return True if dg1 is dominated by dg2.
    """
    def is_dominated(self, dg):
        dg1_nodes = frozenset(self.to_list())
        dg2_nodes = frozenset(dg.to_list())

        # domination first condition: inclusion of all the nodes
        if not dg1_nodes.issubset(dg2_nodes):
            return False

        # domination second condition
        if len(dg1_nodes) == len(dg2_nodes):
150
            if self.get_link_divergence() > dg.get_link_divergence():
151
                return True
152
        elif self.get_link_divergence() >= dg.get_link_divergence():
153
154
155
156
157
            return True

        return False


158
    def __eq__(self, other):
Yoann Dufresne's avatar
Yoann Dufresne committed
159
        return self.to_ordered_lists() == other.to_ordered_lists()
160
161
162
163
164

    def __ne__(self, other):
        return not (self == other)

    def __lt__(self, other):
165
166
        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
167
168
        return (my_tuple < other_tuple)

Yoann Dufresne's avatar
Yoann Dufresne committed
169

170
    def __hash__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
171
        nodelist = list(self.to_list())
172
        nodelist = [str(x) for x in nodelist]
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
        nodelist.sort()
        return ",".join(nodelist).__hash__()

        
    def __ordered_hash__(self):
        lst = self.to_ordered_lists()

        fwd_uniq_lst = [sorted(l) for l in lst]
        fwd_str = ",".join([f"[{'-'.join(l)}]" for l in fwd_uniq_lst])
        fwd_hash = fwd_str.__hash__()

        rev_uniq_lst = [sorted(l) for l in lst[::-1]]
        rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst])
        rev_hash = rev_str.__hash__()

        return int(min(fwd_hash, rev_hash))


191
    def __repr__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
192
        # print(self.halves)
Yoann Dufresne's avatar
Yoann Dufresne committed
193
194
        representation = "" if self.idx == -1 else f"{self.idx} "
        representation += str(self.center) + " " + str(self.score) + "/" + str(self.get_optimal_score()) + " "
Yoann Dufresne's avatar
Yoann Dufresne committed
195
196
197
        representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]"
        representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]"
        return representation
198
199
200
201
202
203


""" From a barcode graph, compute all the possible max d-graphs by node.
    @param graph A barcode graph
    @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
"""
Yoann Dufresne's avatar
Yoann Dufresne committed
204
def compute_all_max_d_graphs(graph, debug=False):
205
    d_graphs = {}
206
207
208
209
210

    for node in list(graph.nodes()):
        neighbors = list(graph.neighbors(node))
        neighbors_graph = nx.Graph(graph.subgraph(neighbors))

211
        node_d_graphs = []
212
213
214
215
216
217
218
219
220
        # Find all the cliques (equivalent to compute all the candidate half d-graph)
        cliques = list(nx.find_cliques(neighbors_graph))

        # Pair halves to create d-graphes
        for idx, clq1 in enumerate(cliques):
            for clq2_idx in range(idx+1, len(cliques)):
                clq2 = cliques[clq2_idx]

                # Check for d-graph candidates
Yoann Dufresne's avatar
Yoann Dufresne committed
221
                d_graph = Dgraph(node)
222
223
                d_graph.put_halves(clq1, clq2, neighbors_graph)
                
224
                if d_graph.get_link_divergence() > d_graph.get_optimal_score() / 2:
225
226
                    continue
                
227
                node_d_graphs.append(d_graph)
228
229
230

        # Cut the the distribution queue

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
        d_graphs[node] = sorted(node_d_graphs)
        # print(node_d_graphs)

    return d_graphs


""" Add the new dg in the dgs list. If dg is dominated by another dg in the list, then it's
    dropped. If any dg in the list is dominated by the dg to add, then, the new dg is added and
    all the dominated dg are removed from the list.
    @param dg A new dg to add/filter.
    @param undominated_dgs_list A list of dg where any of them is dominated by another one.
    @return The updated undominated list. 
"""
def add_new_dg_regarding_domination(dg, undominated_dgs_list):
    to_remove = []

    # Search for domination relations
    for u_dg in undominated_dgs_list:
        if len(to_remove) == 0 and dg.is_dominated(u_dg):
            return undominated_dgs_list
        elif u_dg.is_dominated(dg):
            to_remove.append(u_dg)

    # Remove dominated values
    for dg2 in to_remove:
        undominated_dgs_list.remove(dg2)

    # Add the new dg
    undominated_dgs_list.append(dg)

    return undominated_dgs_list


264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

def filter_dominated(d_graphs, overall=False, in_place=True):
    if not overall:
        return local_domination_filter(d_graphs, in_place)

    all_d_graphs = []
    for dgs in d_graphs.values():
        all_d_graphs.extend(dgs)

    print(len(all_d_graphs))
    all_d_graphs = list_domination_filter(all_d_graphs)
    print(len(all_d_graphs))

    return d_graphs


280
281
282
283
284
285
286
287
""" Filter the d-graphs by node. In a list of d-graph centered on a node n, if a d-graph is
    completly included in another and have a highest distance score to the optimal, then it is
    filtered out.
    @param d_graphs All the d-graphs to filter, sorted by central node.
    @param in_place If true, modify the content of d_graph with the filtered version. If False,
    copy all the content in a new dictionnary.
    @return The filtered dictionnary of d-graph per node.
"""
288
def local_domination_filter(d_graphs, in_place=True):
289
290
291
292
    filtered = d_graphs if in_place else {}

    # Filter node by node
    for node, d_graph_list in d_graphs.items():
293
294
        # Add the non filtered d-graph to the output
        filtered[node] = list_domination_filter(d_graph_list)
295

296
    return filtered
297

298
299
300
301
302
303
304
305
306
307
308
309
310

""" Filter the input d-graphs list. In the list of d-graph centered on a node n, if a d-graph is
    completly included in another and have a highest distance score to the optimal, then it is
    filtered out.
    @param d_graphs All the d-graphs to filter.
    @return The filtered dictionnary of d-graph per node.
"""
def list_domination_filter(d_graphs):
    filtered = []

    # Filter d-graph by d-graph
    for dg in d_graphs:
        add_new_dg_regarding_domination(dg, filtered)
311

312
    return filtered