d_graph.py 5.16 KB
Newer Older
1
2
3
4
import networkx as nx
import math
from functools import total_ordering

Yoann Dufresne's avatar
Yoann Dufresne committed
5

6
7
8
@total_ordering
class Dgraph(object):
    """docstring for Dgraph"""
Yoann Dufresne's avatar
Yoann Dufresne committed
9
    def __init__(self, center):
10
        super(Dgraph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
11
        self.center = center
12
        self.score = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
13
14
        self.halves = [None,None]
        self.connexity = [None,None]
15
16
17
18
19
20
21
22
23
24
25
        

    """ Compute the d-graph quality (score) according to the connectivity between the two halves.
        @param h1 First half of the d-graph
        @param h2 Second half of the d-graph
        @param graph The connectivity graph
    """
    def put_halves(self, h1, h2, graph):
        self.score = 0
        self.halves[0] = h1
        self.halves[1] = h2
Yoann Dufresne's avatar
Yoann Dufresne committed
26
27
        self.connexity[0] = {key:0 for key in self.halves[0]}
        self.connexity[1] = {key:0 for key in self.halves[1]}
28
29
30
31
32
33
34
35

        # Compute link arities
        for node1 in h1:
            neighbors = set(graph.neighbors(node1))

            for node2 in h2:
                if node1 == node2 or node2 in neighbors:
                    self.score += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
36
37
38
39
40
41
42
                    self.connexity[0][node1] += 1
                    self.connexity[1][node2] += 1

        # Sort the halves by descending connexity
        connex = self.connexity
        self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
        self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
43
44


45
    def get_link_divergence(self):
46
47
48
49
50
51
52
53
        return abs((self.score / self.get_optimal_score()) - 1)


    def get_optimal_score(self):
        max_len = max(len(self.halves[0]), len(self.halves[1]))
        return max_len * (max_len - 1) / 2


Yoann Dufresne's avatar
Yoann Dufresne committed
54
55
56
57
    def to_list(self):
        return self.halves[0]+ [self.center] + self.halves[1] 


58
59
60
61
62
63
64
65
66
67
68
69
70
    def to_ordered_lists(self):
        hands = [[],[]]
        for idx in range(2):
            prev_connectivity = -1
            for node in self.halves[idx]:
                # group nodes by similar connectivity
                value = self.connexity[idx][node]
                if value != prev_connectivity:
                    hands[idx].append([])
                    prev_connectivity = value
                hands[idx][-1].append(node)

        return hands[0][::-1] + [[self.center]] + hands[1]
Yoann Dufresne's avatar
Yoann Dufresne committed
71
72


Yoann Dufresne's avatar
Yoann Dufresne committed
73
74
    def to_node_multiset(self):
        return frozenset(self.to_list())
75
76


77
    def __eq__(self, other):
Yoann Dufresne's avatar
Yoann Dufresne committed
78
        return self.to_ordered_lists() == other.to_ordered_lists()
79
80
81
82
83

    def __ne__(self, other):
        return not (self == other)

    def __lt__(self, other):
84
85
        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
86
87
        return (my_tuple < other_tuple)

Yoann Dufresne's avatar
Yoann Dufresne committed
88

89
    def __hash__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
90
        nodelist = list(self.to_list())
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
        nodelist.sort()
        return ",".join(nodelist).__hash__()

        
    def __ordered_hash__(self):
        lst = self.to_ordered_lists()

        fwd_uniq_lst = [sorted(l) for l in lst]
        fwd_str = ",".join([f"[{'-'.join(l)}]" for l in fwd_uniq_lst])
        fwd_hash = fwd_str.__hash__()

        rev_uniq_lst = [sorted(l) for l in lst[::-1]]
        rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst])
        rev_hash = rev_str.__hash__()

        # print()
        # print(fwd_str)
        # print(rev_str)

        return int(min(fwd_hash, rev_hash))


113
    def __repr__(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
114
115
116
117
118
        # print(self.halves)
        representation = self.center + " " + str(self.score) + "/" + str(self.get_optimal_score()) + " "
        representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]"
        representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]"
        return representation
119
120
121
122
123
124
125


""" From a barcode graph, compute all the possible max d-graphs by node.
    @param graph A barcode graph
    @param n_best Only keep n d-graphs (the nearest to 1.0 ratio)
    @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
"""
Yoann Dufresne's avatar
Yoann Dufresne committed
126
def compute_all_max_d_graphs(graph, n_best=10):
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
    d_graphes = {}

    for node in list(graph.nodes()):
        neighbors = list(graph.neighbors(node))
        neighbors_graph = nx.Graph(graph.subgraph(neighbors))

        node_d_graphes = []
        # Find all the cliques (equivalent to compute all the candidate half d-graph)
        cliques = list(nx.find_cliques(neighbors_graph))

        # Pair halves to create d-graphes
        for idx, clq1 in enumerate(cliques):
            for clq2_idx in range(idx+1, len(cliques)):
                clq2 = cliques[clq2_idx]

                # Check for d-graph candidates
Yoann Dufresne's avatar
Yoann Dufresne committed
143
                d_graph = Dgraph(node)
144
145
146
147
148
149
150
151
152
153
154
                d_graph.put_halves(clq1, clq2, neighbors_graph)
                
                optimal_score = d_graph.get_optimal_score()
                # For a minimal connection         To avoid too much shared nodes
                if d_graph.score < optimal_score / 2 or d_graph.score >= 1.5 * optimal_score:
                    continue
                
                node_d_graphes.append(d_graph)

        # Cut the the distribution queue

Yoann Dufresne's avatar
Yoann Dufresne committed
155
156
        d_graphes[node] = sorted(node_d_graphes)[:n_best]
        # print(node_d_graphes)
157
158

    return d_graphes