d_graph.py 5.23 KB
Newer Older
1
2
3
4
import networkx as nx
import math
from functools import total_ordering

Yoann Dufresne's avatar
Yoann Dufresne committed
5

6
7
8
@total_ordering
class Dgraph(object):
    """docstring for Dgraph"""
Yoann Dufresne's avatar
Yoann Dufresne committed
9
    def __init__(self, center):
10
        super(Dgraph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
11
        self.center = center
12
        self.score = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
13
14
        self.halves = [None,None]
        self.connexity = [None,None]
15
16
17
18
19
20
21
22
23
24
25
        

    """ Compute the d-graph quality (score) according to the connectivity between the two halves.
        @param h1 First half of the d-graph
        @param h2 Second half of the d-graph
        @param graph The connectivity graph
    """
    def put_halves(self, h1, h2, graph):
        self.score = 0
        self.halves[0] = h1
        self.halves[1] = h2
Yoann Dufresne's avatar
Yoann Dufresne committed
26
27
        self.connexity[0] = {key:0 for key in self.halves[0]}
        self.connexity[1] = {key:0 for key in self.halves[1]}
28
29
30
31
32
33
34
35

        # Compute link arities
        for node1 in h1:
            neighbors = set(graph.neighbors(node1))

            for node2 in h2:
                if node1 == node2 or node2 in neighbors:
                    self.score += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
36
37
38
39
40
41
42
                    self.connexity[0][node1] += 1
                    self.connexity[1][node2] += 1

        # Sort the halves by descending connexity
        connex = self.connexity
        self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
        self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
43
44


45
    def get_link_divergence(self):
46
47
48
49
50
51
52
53
        return abs((self.score / self.get_optimal_score()) - 1)


    def get_optimal_score(self):
        max_len = max(len(self.halves[0]), len(self.halves[1]))
        return max_len * (max_len - 1) / 2


54
55
56
57
58
59
60
61
62
63
64
65
66
    def to_ordered_lists(self):
        hands = [[],[]]
        for idx in range(2):
            prev_connectivity = -1
            for node in self.halves[idx]:
                # group nodes by similar connectivity
                value = self.connexity[idx][node]
                if value != prev_connectivity:
                    hands[idx].append([])
                    prev_connectivity = value
                hands[idx][-1].append(node)

        return hands[0][::-1] + [[self.center]] + hands[1]
Yoann Dufresne's avatar
Yoann Dufresne committed
67
68


69
70
71
72
    def to_node_set(self):
        return frozenset(self.halves[0] + self.halves[1] + [self.center])


73
    def __eq__(self, other):
74
75
        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
76
77
78
79
80
81
        return (my_tuple == other_tuple)

    def __ne__(self, other):
        return not (self == other)

    def __lt__(self, other):
82
83
        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
84
85
        return (my_tuple < other_tuple)

Yoann Dufresne's avatar
Yoann Dufresne committed
86

87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    def __hash__(self):
        nodelist = list(self.to_node_set())
        nodelist.sort()
        return ",".join(nodelist).__hash__()

        
    def __ordered_hash__(self):
        lst = self.to_ordered_lists()

        fwd_uniq_lst = [sorted(l) for l in lst]
        fwd_str = ",".join([f"[{'-'.join(l)}]" for l in fwd_uniq_lst])
        fwd_hash = fwd_str.__hash__()

        rev_uniq_lst = [sorted(l) for l in lst[::-1]]
        rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst])
        rev_hash = rev_str.__hash__()

        # print()
        # print(fwd_str)
        # print(rev_str)

        return int(min(fwd_hash, rev_hash))


111
    def __repr__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
112
113
114
115
116
        # print(self.halves)
        representation = self.center + " " + str(self.score) + "/" + str(self.get_optimal_score()) + " "
        representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]"
        representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]"
        return representation
117
118
119
120
121
122
123


""" From a barcode graph, compute all the possible max d-graphs by node.
    @param graph A barcode graph
    @param n_best Only keep n d-graphs (the nearest to 1.0 ratio)
    @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
"""
Yoann Dufresne's avatar
Yoann Dufresne committed
124
def compute_all_max_d_graphs(graph, n_best=10):
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
    d_graphes = {}

    for node in list(graph.nodes()):
        neighbors = list(graph.neighbors(node))
        neighbors_graph = nx.Graph(graph.subgraph(neighbors))

        node_d_graphes = []
        # Find all the cliques (equivalent to compute all the candidate half d-graph)
        cliques = list(nx.find_cliques(neighbors_graph))

        # Pair halves to create d-graphes
        for idx, clq1 in enumerate(cliques):
            for clq2_idx in range(idx+1, len(cliques)):
                clq2 = cliques[clq2_idx]

                # Check for d-graph candidates
Yoann Dufresne's avatar
Yoann Dufresne committed
141
                d_graph = Dgraph(node)
142
143
144
145
146
147
148
149
150
151
152
                d_graph.put_halves(clq1, clq2, neighbors_graph)
                
                optimal_score = d_graph.get_optimal_score()
                # For a minimal connection         To avoid too much shared nodes
                if d_graph.score < optimal_score / 2 or d_graph.score >= 1.5 * optimal_score:
                    continue
                
                node_d_graphes.append(d_graph)

        # Cut the the distribution queue

Yoann Dufresne's avatar
Yoann Dufresne committed
153
154
        d_graphes[node] = sorted(node_d_graphes)[:n_best]
        # print(node_d_graphes)
155
156

    return d_graphes