lcp_graph.py 6.22 KB
Newer Older
Yoann Dufresne's avatar
Yoann Dufresne committed
1
import networkx as nx
2
import itertools
Yoann Dufresne's avatar
Yoann Dufresne committed
3
from bidict import bidict
4
import sys
5

Yoann Dufresne's avatar
Yoann Dufresne committed
6
7
# from deconvolution.lcp.FixedDGIndex import FixedDGIndex
from deconvolution.lcp.VariableDGIndex import VariableDGIndex
Yoann Dufresne's avatar
Yoann Dufresne committed
8
from deconvolution.lcp.lcp import Lcp
Yoann Dufresne's avatar
Yoann Dufresne committed
9
10
from deconvolution.lcp.CliqueLcpFactory import CliqueDGFactory
from deconvolution.lcp.LouvainDGFactory import LouvainDGFactory
Yoann Dufresne's avatar
Yoann Dufresne committed
11
12


Yoann Dufresne's avatar
Yoann Dufresne committed
13
14
class LcpGraph(nx.Graph):
    """LcpGraph"""
15
    def __init__(self, debug=False, debug_path='.'):
Yoann Dufresne's avatar
Yoann Dufresne committed
16
        super(LcpGraph, self).__init__()
17
        self.all_lcps = []
Yoann Dufresne's avatar
Yoann Dufresne committed
18
        self.lcp_per_node = {}
Yoann Dufresne's avatar
Yoann Dufresne committed
19
        self.node_by_idx = {}
20
        self.barcode_graph = None
21
22
        self.index = None

23
        self.variables = set()
Yoann Dufresne's avatar
Yoann Dufresne committed
24
        self.variables_per_lcp = {}
Yoann Dufresne's avatar
Yoann Dufresne committed
25

26
        self.barcode_edge_idxs = {}
27
        self.nb_uniq_edge = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
28

29
30
31
        self.debug = debug
        self.debug_path = debug_path

32

33
34
35
    """ Redefine subgraph to avoid errors type instantiation errors.
    """
    def subgraph(self, nodes):
Yoann Dufresne's avatar
Yoann Dufresne committed
36
37
        nodes = frozenset(nodes)

Yoann Dufresne's avatar
Yoann Dufresne committed
38
        G = LcpGraph(self.barcode_graph)
Yoann Dufresne's avatar
Yoann Dufresne committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
        G.barcode_edge_idxs = self.barcode_edge_idxs

        # Add sub-nodes
        for node in nodes:
            G.add_node(node)
            G.nodes[node].update(self.nodes[node])

        # Add edges
        for node1, node2, data in self.edges(data=True):
            if node1 in nodes and node2 in nodes:
                G.add_edge(node1, node2, distance=data["distance"])

        # Node by idx
        G.node_by_idx = self.node_by_idx

54
55
        G.variables = self.variables.copy()

Yoann Dufresne's avatar
Yoann Dufresne committed
56
        return G
57

Yoann Dufresne's avatar
Yoann Dufresne committed
58
59
60
    def clone(self):
        return self.subgraph(list(self.nodes()))

61

62
63
64
65
66
67
68
69
70
71
72
73
74
    def construct_from_barcodes(self, barcode_graph, neighbor_threshold=0.25, min_size_clique=4, verbose=False, clique_mode=None, threads=1):
        self.barcode_graph = barcode_graph

        # Number the edges from original graph
        for idx, edge in enumerate(self.barcode_graph.edges()):
            if edge == (edge[1], edge[0]):
                self.nb_uniq_edge += 1
            if edge in self.barcode_edge_idxs:
                print("Edge already present")
            self.barcode_edge_idxs[edge] = idx
            self.barcode_edge_idxs[(edge[1], edge[0])] = idx
            self.barcode_graph[edge[0]][edge[1]]["uniq_idx"] = idx

Yoann Dufresne's avatar
Yoann Dufresne committed
75
        # Compute all the d-graphs
76
        if verbose:
77
            print("Computing the unit d-graphs..")
Yoann Dufresne's avatar
Yoann Dufresne committed
78
        lcp_factory = None
Yoann Dufresne's avatar
Yoann Dufresne committed
79
        if clique_mode == "louvain":
Yoann Dufresne's avatar
Yoann Dufresne committed
80
            lcp_factory = LouvainDGFactory(self.barcode_graph)
Yoann Dufresne's avatar
Yoann Dufresne committed
81
        else:
Yoann Dufresne's avatar
Yoann Dufresne committed
82
            lcp_factory = CliqueDGFactory(self.barcode_graph, min_size_clique=min_size_clique, debug=self.debug, debug_path=self.debug_path)
Yoann Dufresne's avatar
Yoann Dufresne committed
83
        self.lcp_per_node = lcp_factory.generate_all_lcps(threads=threads, verbose=verbose)
84
        if verbose:
Yoann Dufresne's avatar
Yoann Dufresne committed
85
            counts = sum(len(x) for x in self.lcp_per_node.values())
86
            print(f"\t {counts} computed d-graphs")
Yoann Dufresne's avatar
Yoann Dufresne committed
87
        for d_graphs in self.lcp_per_node.values():
88
            self.all_lcps.extend(d_graphs)
Yoann Dufresne's avatar
Yoann Dufresne committed
89
90

        # Number the d_graphs
91
        for idx, d_graph in enumerate(self.all_lcps):
Yoann Dufresne's avatar
Yoann Dufresne committed
92
            d_graph.idx = idx
93
            self.node_by_idx[idx] = d_graph
94

95
        if verbose:
96
            print("Compute the graph")
97
        # Create the graph
Yoann Dufresne's avatar
Yoann Dufresne committed
98
        self.bidict_nodes = self.create_graph_from_node_neighborhoods(neighbor_threshold)
Yoann Dufresne's avatar
Yoann Dufresne committed
99
100


Yoann Dufresne's avatar
Yoann Dufresne committed
101
    def get_lcp(self, obj):
Yoann Dufresne's avatar
Yoann Dufresne committed
102
103
104
105
106
        if type(obj) == str:
            obj = int(obj)
        if type(obj) == int:
            obj = self.node_by_idx[obj]
        return obj
Yoann Dufresne's avatar
Yoann Dufresne committed
107

Yoann Dufresne's avatar
Yoann Dufresne committed
108
    def get_covering_variables(self, obj):
Yoann Dufresne's avatar
Yoann Dufresne committed
109
        lcp = self.get_lcp(obj)
Yoann Dufresne's avatar
Yoann Dufresne committed
110
111
        if lcp not in self.variables_per_lcp:
            variables = []
112
113
            for e_idx in lcp.edges:
                variables.append(e_idx)
Yoann Dufresne's avatar
Yoann Dufresne committed
114
            self.variables_per_lcp[lcp] = variables
Yoann Dufresne's avatar
Yoann Dufresne committed
115

Yoann Dufresne's avatar
Yoann Dufresne committed
116
        return self.variables_per_lcp[lcp]
Yoann Dufresne's avatar
Yoann Dufresne committed
117
118


119
120
    def load(self, filename):
        # Reload the graph
121
        G = nx.read_gexf(filename)
122
        for node, attrs in G.nodes(data=True):
123
124
            self.add_node(node)
            self.nodes[node].update(attrs)
125
126
        for edge in G.edges(data=True):
            self.add_edge(edge[0], edge[1], distance=edge[2]["distance"])
127
128

        # Extract d-graphs from nx graph
Yoann Dufresne's avatar
Yoann Dufresne committed
129
        # self.node_by_name = {}
130
        self.bidict_nodes = {}
131
132
        for idx, node in enumerate(self.nodes(data=True)):
            node, data = node
133
134
135
136
137
138
139
            lcp = Lcp.load(data["udg"], data["score"], data["barcode_edges"])
            self.variables.update(lcp.edges)
            self.bidict_nodes[node] = lcp
            self.all_lcps.append(lcp)
            if lcp.idx == -1:
                lcp.idx = int(node)
            self.node_by_idx[lcp.idx] = lcp
Yoann Dufresne's avatar
Yoann Dufresne committed
140
            # self.node_by_name[node] = lcp
141
        self.bidict_nodes = bidict(self.bidict_nodes)
142

143

144
145
146
    def create_graph_from_node_neighborhoods(self, neighborhood_threshold=0.25):
        nodes = {}

Yoann Dufresne's avatar
Yoann Dufresne committed
147
        # Create the nodes of lcpg from udgs
148
        for lcp in self.all_lcps:
Yoann Dufresne's avatar
Yoann Dufresne committed
149
150
            nodes[lcp] = lcp.idx
            self.add_node(nodes[lcp])
151
            # Add covering barcode edges
Yoann Dufresne's avatar
Yoann Dufresne committed
152
153
154
155
156
            barcode_edges = " ".join([str(x) for x in lcp.edges])
            self.nodes[nodes[lcp]]["barcode_edges"] = barcode_edges
            self.nodes[nodes[lcp]]["score"] = f"{lcp.score}/{lcp.get_optimal_score()}"
            self.nodes[nodes[lcp]]["udg"] = str(lcp)
            self.nodes[nodes[lcp]]["central_node_barcode"] = str(lcp.center)
157
158

        # Create the edges from neighbor edges
159
        for lcp in self.all_lcps:
Yoann Dufresne's avatar
Yoann Dufresne committed
160
161
            for node in lcp.to_node_set():
                if node == lcp.center:
162
163
                    continue
                entry = frozenset({node})
Yoann Dufresne's avatar
Yoann Dufresne committed
164
165
166
167
168
                if entry in self.lcp_per_node:
                    colliding_dgs = self.lcp_per_node[entry]
                    for colliding_lcp in colliding_dgs:
                        distance = lcp.distance_to(colliding_lcp)
                        distance_ratio = distance / (len(lcp.nodes) + len(colliding_lcp.nodes))
169
                        if distance_ratio <= neighborhood_threshold:
Yoann Dufresne's avatar
Yoann Dufresne committed
170
                            self.add_edge(nodes[lcp], nodes[colliding_lcp], distance=distance)
171
172
173
174
175
176
177
178
179

        # Filter out singletons
        graph_nodes = list(nodes)
        for n in graph_nodes:
            if len(list(self.neighbors(nodes[n]))) == 0:
                self.remove_node(nodes[n])
                del nodes[n]

        return bidict(nodes)