d_graph.py 9.47 KB
Newer Older
1
2
3
4
import networkx as nx
import math
from functools import total_ordering

Yoann Dufresne's avatar
Yoann Dufresne committed
5

6
7
8
@total_ordering
class Dgraph(object):
    """docstring for Dgraph"""
Yoann Dufresne's avatar
Yoann Dufresne committed
9
    def __init__(self, center):
10
        super(Dgraph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
11
        self.idx = -1
Yoann Dufresne's avatar
Yoann Dufresne committed
12
        self.center = center
13
        self.score = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
14
15
        self.halves = [None,None]
        self.connexity = [None,None]
16
        self.nodes = [self.center]
Yoann Dufresne's avatar
Yoann Dufresne committed
17
        self.edges = []
18
19
20
21
22
23
24
25
26
27
28
        

    """ Compute the d-graph quality (score) according to the connectivity between the two halves.
        @param h1 First half of the d-graph
        @param h2 Second half of the d-graph
        @param graph The connectivity graph
    """
    def put_halves(self, h1, h2, graph):
        self.score = 0
        self.halves[0] = h1
        self.halves[1] = h2
29
        self.nodes = sorted([self.center] + self.halves[0] + self.halves[1])
Yoann Dufresne's avatar
Yoann Dufresne committed
30
31
        self.connexity[0] = {key:0 for key in self.halves[0]}
        self.connexity[1] = {key:0 for key in self.halves[1]}
Yoann Dufresne's avatar
Yoann Dufresne committed
32
        self.edges = []
33
34

        # Compute link arities
35
        for node1 in self.halves[0]:
36
37
            neighbors = set(graph.neighbors(node1))

38
            for node2 in self.halves[1]:
39
40
                if node1 == node2 or node2 in neighbors:
                    self.score += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
41
42
43
                    self.connexity[0][node1] += 1
                    self.connexity[1][node2] += 1

44
45
46
47
        # Compute links from the center to the other nodes
        for idx, node1 in enumerate(self.nodes):
            for node2 in self.nodes[idx+1:]:
                if graph.has_edge(node1, node2):
Yoann Dufresne's avatar
Yoann Dufresne committed
48
49
                    if node1 < node2:
                        self.edges.append((node1, node2))
50
                    else:
Yoann Dufresne's avatar
Yoann Dufresne committed
51
52
                        self.edges.append((node2, node1))

Yoann Dufresne's avatar
Yoann Dufresne committed
53
54
55
56
        # Sort the halves by descending connexity
        connex = self.connexity
        self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
        self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
57
58


59
    def get_link_divergence(self):
60
        return abs(self.score - self.get_optimal_score())
61
62
63
64
65
66
67


    def get_optimal_score(self):
        max_len = max(len(self.halves[0]), len(self.halves[1]))
        return max_len * (max_len - 1) / 2


Yoann Dufresne's avatar
Yoann Dufresne committed
68
69
70
71
    def to_list(self):
        return self.halves[0]+ [self.center] + self.halves[1] 


72
73
74
75
76
77
78
79
80
81
82
83
84
    def to_ordered_lists(self):
        hands = [[],[]]
        for idx in range(2):
            prev_connectivity = -1
            for node in self.halves[idx]:
                # group nodes by similar connectivity
                value = self.connexity[idx][node]
                if value != prev_connectivity:
                    hands[idx].append([])
                    prev_connectivity = value
                hands[idx][-1].append(node)

        return hands[0][::-1] + [[self.center]] + hands[1]
Yoann Dufresne's avatar
Yoann Dufresne committed
85
86


Yoann Dufresne's avatar
Yoann Dufresne committed
87
88
    def to_node_multiset(self):
        return frozenset(self.to_list())
89
90


91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    def distance_to(self, dgraph):
        other_nodes = dgraph.nodes

        dist = 0
        idx1, idx2 = 0, 0
        while(idx1 != len(self.nodes) and idx2 != len(other_nodes)):
            if self.nodes[idx1] == other_nodes[idx2]:
                idx1 += 1
                idx2 += 1
            else:
                dist += 1
                if self.nodes[idx1] < other_nodes[idx2]:
                    idx1 += 1
                else:
                    idx2 += 1
        dist += len(self.nodes) - idx1 + len(other_nodes) - idx2

        return dist


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
    """ Verify if dg1 is dominated by dg2. The domination is determined by two points: All the nodes
    of dg1 are part of dg2 and the divergeance of dg1 is greater than dg2.
    @param dg1 (resp dg2) A d_graph object.
    @return True if dg1 is dominated by dg2.
    """
    def is_dominated(self, dg):
        dg1_nodes = frozenset(self.to_list())
        dg2_nodes = frozenset(dg.to_list())

        # domination first condition: inclusion of all the nodes
        if not dg1_nodes.issubset(dg2_nodes):
            return False

        # domination second condition
        if len(dg1_nodes) == len(dg2_nodes):
126
            if self.get_link_divergence() > dg.get_link_divergence():
127
                return True
128
        elif self.get_link_divergence() >= dg.get_link_divergence():
129
130
131
132
133
            return True

        return False


134
    def __eq__(self, other):
Yoann Dufresne's avatar
Yoann Dufresne committed
135
        return self.to_ordered_lists() == other.to_ordered_lists()
136
137
138
139
140

    def __ne__(self, other):
        return not (self == other)

    def __lt__(self, other):
141
142
        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
143
144
        return (my_tuple < other_tuple)

Yoann Dufresne's avatar
Yoann Dufresne committed
145

146
    def __hash__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
147
        nodelist = list(self.to_list())
148
        nodelist = [str(x) for x in nodelist]
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
        nodelist.sort()
        return ",".join(nodelist).__hash__()

        
    def __ordered_hash__(self):
        lst = self.to_ordered_lists()

        fwd_uniq_lst = [sorted(l) for l in lst]
        fwd_str = ",".join([f"[{'-'.join(l)}]" for l in fwd_uniq_lst])
        fwd_hash = fwd_str.__hash__()

        rev_uniq_lst = [sorted(l) for l in lst[::-1]]
        rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst])
        rev_hash = rev_str.__hash__()

        return int(min(fwd_hash, rev_hash))


167
    def __repr__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
168
        # print(self.halves)
169
        representation = str(self.center) + " " + str(self.score) + "/" + str(self.get_optimal_score()) + " "
Yoann Dufresne's avatar
Yoann Dufresne committed
170
171
172
        representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]"
        representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]"
        return representation
173
174
175
176
177
178
179


""" From a barcode graph, compute all the possible max d-graphs by node.
    @param graph A barcode graph
    @param n_best Only keep n d-graphs (the nearest to 1.0 ratio)
    @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
"""
Yoann Dufresne's avatar
Yoann Dufresne committed
180
def compute_all_max_d_graphs(graph, n_best=100, debug=False):
181
    d_graphs = {}
182
183
184
185
186

    for node in list(graph.nodes()):
        neighbors = list(graph.neighbors(node))
        neighbors_graph = nx.Graph(graph.subgraph(neighbors))

187
        node_d_graphs = []
188
189
190
191
192
193
194
195
196
        # Find all the cliques (equivalent to compute all the candidate half d-graph)
        cliques = list(nx.find_cliques(neighbors_graph))

        # Pair halves to create d-graphes
        for idx, clq1 in enumerate(cliques):
            for clq2_idx in range(idx+1, len(cliques)):
                clq2 = cliques[clq2_idx]

                # Check for d-graph candidates
Yoann Dufresne's avatar
Yoann Dufresne committed
197
                d_graph = Dgraph(node)
198
199
                d_graph.put_halves(clq1, clq2, neighbors_graph)
                
200
                if d_graph.get_link_divergence() > d_graph.get_optimal_score() / 2:
201
202
                    continue
                
203
                node_d_graphs.append(d_graph)
204
205
206

        # Cut the the distribution queue

207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
        d_graphs[node] = sorted(node_d_graphs)
        # print(node_d_graphs)

    return d_graphs


""" Add the new dg in the dgs list. If dg is dominated by another dg in the list, then it's
    dropped. If any dg in the list is dominated by the dg to add, then, the new dg is added and
    all the dominated dg are removed from the list.
    @param dg A new dg to add/filter.
    @param undominated_dgs_list A list of dg where any of them is dominated by another one.
    @return The updated undominated list. 
"""
def add_new_dg_regarding_domination(dg, undominated_dgs_list):
    to_remove = []

    # Search for domination relations
    for u_dg in undominated_dgs_list:
        if len(to_remove) == 0 and dg.is_dominated(u_dg):
            return undominated_dgs_list
        elif u_dg.is_dominated(dg):
            to_remove.append(u_dg)

    # Remove dominated values
    for dg2 in to_remove:
        undominated_dgs_list.remove(dg2)

    # Add the new dg
    undominated_dgs_list.append(dg)

    return undominated_dgs_list


240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255

def filter_dominated(d_graphs, overall=False, in_place=True):
    if not overall:
        return local_domination_filter(d_graphs, in_place)

    all_d_graphs = []
    for dgs in d_graphs.values():
        all_d_graphs.extend(dgs)

    print(len(all_d_graphs))
    all_d_graphs = list_domination_filter(all_d_graphs)
    print(len(all_d_graphs))

    return d_graphs


256
257
258
259
260
261
262
263
""" Filter the d-graphs by node. In a list of d-graph centered on a node n, if a d-graph is
    completly included in another and have a highest distance score to the optimal, then it is
    filtered out.
    @param d_graphs All the d-graphs to filter, sorted by central node.
    @param in_place If true, modify the content of d_graph with the filtered version. If False,
    copy all the content in a new dictionnary.
    @return The filtered dictionnary of d-graph per node.
"""
264
def local_domination_filter(d_graphs, in_place=True):
265
266
267
268
    filtered = d_graphs if in_place else {}

    # Filter node by node
    for node, d_graph_list in d_graphs.items():
269
270
        # Add the non filtered d-graph to the output
        filtered[node] = list_domination_filter(d_graph_list)
271

272
    return filtered
273

274
275
276
277
278
279
280
281
282
283
284
285
286

""" Filter the input d-graphs list. In the list of d-graph centered on a node n, if a d-graph is
    completly included in another and have a highest distance score to the optimal, then it is
    filtered out.
    @param d_graphs All the d-graphs to filter.
    @return The filtered dictionnary of d-graph per node.
"""
def list_domination_filter(d_graphs):
    filtered = []

    # Filter d-graph by d-graph
    for dg in d_graphs:
        add_new_dg_regarding_domination(dg, filtered)
287

288
    return filtered