d_graph.py 5.72 KB
Newer Older
1
2
3
4
import networkx as nx
import math
from functools import total_ordering

Yoann Dufresne's avatar
Yoann Dufresne committed
5

6
7
8
@total_ordering
class Dgraph(object):
    """docstring for Dgraph"""
Yoann Dufresne's avatar
Yoann Dufresne committed
9
    def __init__(self, center):
10
        super(Dgraph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
11
        self.center = center
12
        self.score = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
13
14
        self.halves = [None,None]
        self.connexity = [None,None]
15
        self.nodes = [center]
16
17
18
19
20
21
22
23
24
25
26
        

    """ Compute the d-graph quality (score) according to the connectivity between the two halves.
        @param h1 First half of the d-graph
        @param h2 Second half of the d-graph
        @param graph The connectivity graph
    """
    def put_halves(self, h1, h2, graph):
        self.score = 0
        self.halves[0] = h1
        self.halves[1] = h2
27
        self.nodes = sorted([self.center] + h1 + h2)
Yoann Dufresne's avatar
Yoann Dufresne committed
28
29
        self.connexity[0] = {key:0 for key in self.halves[0]}
        self.connexity[1] = {key:0 for key in self.halves[1]}
30
31
32
33
34
35
36
37

        # Compute link arities
        for node1 in h1:
            neighbors = set(graph.neighbors(node1))

            for node2 in h2:
                if node1 == node2 or node2 in neighbors:
                    self.score += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
38
39
40
41
42
43
44
                    self.connexity[0][node1] += 1
                    self.connexity[1][node2] += 1

        # Sort the halves by descending connexity
        connex = self.connexity
        self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
        self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
45
46


47
    def get_link_divergence(self):
48
49
50
51
52
53
54
55
        return abs((self.score / self.get_optimal_score()) - 1)


    def get_optimal_score(self):
        max_len = max(len(self.halves[0]), len(self.halves[1]))
        return max_len * (max_len - 1) / 2


Yoann Dufresne's avatar
Yoann Dufresne committed
56
57
58
59
    def to_list(self):
        return self.halves[0]+ [self.center] + self.halves[1] 


60
61
62
63
64
65
66
67
68
69
70
71
72
    def to_ordered_lists(self):
        hands = [[],[]]
        for idx in range(2):
            prev_connectivity = -1
            for node in self.halves[idx]:
                # group nodes by similar connectivity
                value = self.connexity[idx][node]
                if value != prev_connectivity:
                    hands[idx].append([])
                    prev_connectivity = value
                hands[idx][-1].append(node)

        return hands[0][::-1] + [[self.center]] + hands[1]
Yoann Dufresne's avatar
Yoann Dufresne committed
73
74


Yoann Dufresne's avatar
Yoann Dufresne committed
75
76
    def to_node_multiset(self):
        return frozenset(self.to_list())
77
78


79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
    def distance_to(self, dgraph):
        other_nodes = dgraph.nodes

        dist = 0
        idx1, idx2 = 0, 0
        while(idx1 != len(self.nodes) and idx2 != len(other_nodes)):
            if self.nodes[idx1] == other_nodes[idx2]:
                idx1 += 1
                idx2 += 1
            else:
                dist += 1
                if self.nodes[idx1] < other_nodes[idx2]:
                    idx1 += 1
                else:
                    idx2 += 1
        dist += len(self.nodes) - idx1 + len(other_nodes) - idx2

        return dist


99
    def __eq__(self, other):
Yoann Dufresne's avatar
Yoann Dufresne committed
100
        return self.to_ordered_lists() == other.to_ordered_lists()
101
102
103
104
105

    def __ne__(self, other):
        return not (self == other)

    def __lt__(self, other):
106
107
        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
108
109
        return (my_tuple < other_tuple)

Yoann Dufresne's avatar
Yoann Dufresne committed
110

111
    def __hash__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
112
        nodelist = list(self.to_list())
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
        nodelist.sort()
        return ",".join(nodelist).__hash__()

        
    def __ordered_hash__(self):
        lst = self.to_ordered_lists()

        fwd_uniq_lst = [sorted(l) for l in lst]
        fwd_str = ",".join([f"[{'-'.join(l)}]" for l in fwd_uniq_lst])
        fwd_hash = fwd_str.__hash__()

        rev_uniq_lst = [sorted(l) for l in lst[::-1]]
        rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst])
        rev_hash = rev_str.__hash__()

        return int(min(fwd_hash, rev_hash))


131
    def __repr__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
132
133
134
135
136
        # print(self.halves)
        representation = self.center + " " + str(self.score) + "/" + str(self.get_optimal_score()) + " "
        representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]"
        representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]"
        return representation
137
138
139
140
141
142
143


""" From a barcode graph, compute all the possible max d-graphs by node.
    @param graph A barcode graph
    @param n_best Only keep n d-graphs (the nearest to 1.0 ratio)
    @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
"""
Yoann Dufresne's avatar
Yoann Dufresne committed
144
def compute_all_max_d_graphs(graph, n_best=10):
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
    d_graphes = {}

    for node in list(graph.nodes()):
        neighbors = list(graph.neighbors(node))
        neighbors_graph = nx.Graph(graph.subgraph(neighbors))

        node_d_graphes = []
        # Find all the cliques (equivalent to compute all the candidate half d-graph)
        cliques = list(nx.find_cliques(neighbors_graph))

        # Pair halves to create d-graphes
        for idx, clq1 in enumerate(cliques):
            for clq2_idx in range(idx+1, len(cliques)):
                clq2 = cliques[clq2_idx]

                # Check for d-graph candidates
Yoann Dufresne's avatar
Yoann Dufresne committed
161
                d_graph = Dgraph(node)
162
163
164
165
166
167
168
169
170
171
172
                d_graph.put_halves(clq1, clq2, neighbors_graph)
                
                optimal_score = d_graph.get_optimal_score()
                # For a minimal connection         To avoid too much shared nodes
                if d_graph.score < optimal_score / 2 or d_graph.score >= 1.5 * optimal_score:
                    continue
                
                node_d_graphes.append(d_graph)

        # Cut the the distribution queue

Yoann Dufresne's avatar
Yoann Dufresne committed
173
174
        d_graphes[node] = sorted(node_d_graphes)[:n_best]
        # print(node_d_graphes)
175
176

    return d_graphes