d2_graph.py 8.47 KB
Newer Older
Yoann Dufresne's avatar
Yoann Dufresne committed
1
import networkx as nx
2
import itertools
Yoann Dufresne's avatar
Yoann Dufresne committed
3
from bidict import bidict
4
import sys
5

6
# from deconvolution.dgraph.FixedDGIndex import FixedDGIndex
Yoann Dufresne's avatar
Yoann Dufresne committed
7
8
9
10
from deconvolution.dgraph.VariableDGIndex import VariableDGIndex
from deconvolution.dgraph.d_graph import Dgraph
from deconvolution.dgraph.CliqueDGFactory import CliqueDGFactory
from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory
Yoann Dufresne's avatar
Yoann Dufresne committed
11
12


13
class D2Graph(nx.Graph):
Yoann Dufresne's avatar
Yoann Dufresne committed
14
    """D2Graph (read it (d-graph)²)"""
15
    def __init__(self, debug=False, debug_path='.'):
Yoann Dufresne's avatar
Yoann Dufresne committed
16
        super(D2Graph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
17
18
19
        self.all_d_graphs = []
        self.d_graphs_per_node = {}
        self.node_by_idx = {}
20
        self.barcode_graph = None
21
22
        self.index = None

23
        self.variables = set()
Yoann Dufresne's avatar
Yoann Dufresne committed
24
        self.variables_per_lcp = {}
Yoann Dufresne's avatar
Yoann Dufresne committed
25

26
        self.barcode_edge_idxs = {}
27
        self.nb_uniq_edge = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
28

29
30
31
        self.debug = debug
        self.debug_path = debug_path

32

33
34
35
    """ Redefine subgraph to avoid errors type instantiation errors.
    """
    def subgraph(self, nodes):
Yoann Dufresne's avatar
Yoann Dufresne committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
        nodes = frozenset(nodes)

        G = D2Graph(self.barcode_graph)
        G.barcode_edge_idxs = self.barcode_edge_idxs

        # Add sub-nodes
        for node in nodes:
            G.add_node(node)
            G.nodes[node].update(self.nodes[node])

        # Add edges
        for node1, node2, data in self.edges(data=True):
            if node1 in nodes and node2 in nodes:
                G.add_edge(node1, node2, distance=data["distance"])

        # Node by idx
        G.node_by_idx = self.node_by_idx

54
55
        G.variables = self.variables.copy()

Yoann Dufresne's avatar
Yoann Dufresne committed
56
        return G
57

Yoann Dufresne's avatar
Yoann Dufresne committed
58
59
60
    def clone(self):
        return self.subgraph(list(self.nodes()))

61

62
63
64
65
66
67
68
69
70
71
72
73
74
    def construct_from_barcodes(self, barcode_graph, neighbor_threshold=0.25, min_size_clique=4, verbose=False, clique_mode=None, threads=1):
        self.barcode_graph = barcode_graph

        # Number the edges from original graph
        for idx, edge in enumerate(self.barcode_graph.edges()):
            if edge == (edge[1], edge[0]):
                self.nb_uniq_edge += 1
            if edge in self.barcode_edge_idxs:
                print("Edge already present")
            self.barcode_edge_idxs[edge] = idx
            self.barcode_edge_idxs[(edge[1], edge[0])] = idx
            self.barcode_graph[edge[0]][edge[1]]["uniq_idx"] = idx

Yoann Dufresne's avatar
Yoann Dufresne committed
75
        # Compute all the d-graphs
76
        if verbose:
77
            print("Computing the unit d-graphs..")
Yoann Dufresne's avatar
Yoann Dufresne committed
78
79
80
81
        dg_factory = None
        if clique_mode == "louvain":
            dg_factory = LouvainDGFactory(self.barcode_graph)
        else:
82
83
            dg_factory = CliqueDGFactory(self.barcode_graph, min_size_clique=min_size_clique, debug=self.debug, debug_path=self.debug_path)
        self.d_graphs_per_node = dg_factory.generate_all_dgraphs(threads=threads, verbose=verbose)
84
85
86
        if verbose:
            counts = sum(len(x) for x in self.d_graphs_per_node.values())
            print(f"\t {counts} computed d-graphs")
Yoann Dufresne's avatar
Yoann Dufresne committed
87
88
89
90
91
92
        for d_graphs in self.d_graphs_per_node.values():
            self.all_d_graphs.extend(d_graphs)

        # Number the d_graphs
        for idx, d_graph in enumerate(self.all_d_graphs):
            d_graph.idx = idx
93
            self.node_by_idx[idx] = d_graph
94

95
        if verbose:
96
            print("Compute the graph")
97
        # Create the graph
Yoann Dufresne's avatar
Yoann Dufresne committed
98
        self.bidict_nodes = self.create_graph_from_node_neighborhoods(neighbor_threshold)
Yoann Dufresne's avatar
Yoann Dufresne committed
99
100


Yoann Dufresne's avatar
Yoann Dufresne committed
101
    def get_lcp(self, obj):
Yoann Dufresne's avatar
Yoann Dufresne committed
102
103
104
105
106
        if type(obj) == str:
            obj = int(obj)
        if type(obj) == int:
            obj = self.node_by_idx[obj]
        return obj
Yoann Dufresne's avatar
Yoann Dufresne committed
107

Yoann Dufresne's avatar
Yoann Dufresne committed
108
    def get_covering_variables(self, obj):
Yoann Dufresne's avatar
Yoann Dufresne committed
109
        lcp = self.get_lcp(obj)
Yoann Dufresne's avatar
Yoann Dufresne committed
110
111
        if lcp not in self.variables_per_lcp:
            variables = []
112
113
            for e_idx in lcp.edges:
                variables.append(e_idx)
Yoann Dufresne's avatar
Yoann Dufresne committed
114
            self.variables_per_lcp[lcp] = variables
Yoann Dufresne's avatar
Yoann Dufresne committed
115

Yoann Dufresne's avatar
Yoann Dufresne committed
116
        return self.variables_per_lcp[lcp]
Yoann Dufresne's avatar
Yoann Dufresne committed
117
118


119
120
    def load(self, filename):
        # Reload the graph
121
        G = nx.read_gexf(filename)
122
        for node, attrs in G.nodes(data=True):
123
124
            self.add_node(node)
            self.nodes[node].update(attrs)
125
126
        for edge in G.edges(data=True):
            self.add_edge(edge[0], edge[1], distance=edge[2]["distance"])
127
128

        # Extract d-graphs from nx graph
Yoann Dufresne's avatar
Yoann Dufresne committed
129
        # self.node_by_name = {}
130
        self.bidict_nodes = {}
131
132
        for idx, node in enumerate(self.nodes(data=True)):
            node, data = node
Yoann Dufresne's avatar
Yoann Dufresne committed
133
            dg = Dgraph.load(data["udg"], data["score"], data["barcode_edges"])
134
            self.variables.update(dg.edges)
135
            self.bidict_nodes[node] = dg
136
137
            self.all_d_graphs.append(dg)
            if dg.idx == -1:
Yoann Dufresne's avatar
Yoann Dufresne committed
138
                dg.idx = int(node)
139
            self.node_by_idx[dg.idx] = dg
Yoann Dufresne's avatar
Yoann Dufresne committed
140
            # self.node_by_name[node] = dg
141
        self.bidict_nodes = bidict(self.bidict_nodes)
142

143

144
    def create_index_from_tuples(self, tuple_size=3, verbose=True):
Yoann Dufresne's avatar
Yoann Dufresne committed
145
        index = {}
146

147
148
149
150
151
152
153
154
        if verbose:
            print("\tIndex d-graphs")
        for lst_idx, dg in enumerate(self.all_d_graphs):
            if verbose:
                sys.stdout.write(f"\r\t{lst_idx+1}/{len(self.all_d_graphs)}")
                sys.stdout.flush()

            nodelist = dg.to_sorted_list()
Yoann Dufresne's avatar
Yoann Dufresne committed
155
156
157
158
159
            if len(nodelist) < tuple_size:
                continue

            # Generate all tuplesize-mers
            for dmer in itertools.combinations(nodelist, tuple_size):
160
161
162
163
164
165
                if dmer not in index:
                    index[dmer] = set()
                index[dmer].add(dg)

        if verbose:
            print()
166
167
168
169

        return index


170
171
172
173
174
175
176
177
    def create_graph_from_node_neighborhoods(self, neighborhood_threshold=0.25):
        nodes = {}

        # Create the nodes of d2g from udgs
        for dg in self.all_d_graphs:
            nodes[dg] = dg.idx
            self.add_node(nodes[dg])
            # Add covering barcode edges
178
            barcode_edges = " ".join([str(x) for x in dg.edges])
179
180
181
            self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
            self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
            self.nodes[nodes[dg]]["udg"] = str(dg)
Yoann Dufresne's avatar
Yoann Dufresne committed
182
            self.nodes[nodes[dg]]["central_node_barcode"] = str(dg.center)
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207

        # Create the edges from neighbor edges
        for dg in self.all_d_graphs:
            for node in dg.to_node_set():
                if node == dg.center:
                    continue
                entry = frozenset({node})
                if entry in self.d_graphs_per_node:
                    colliding_dgs = self.d_graphs_per_node[entry]
                    for colliding_dg in colliding_dgs:
                        distance = dg.distance_to(colliding_dg)
                        distance_ratio = distance / (len(dg.nodes) + len(colliding_dg.nodes))
                        if distance_ratio <= neighborhood_threshold:
                            self.add_edge(nodes[dg], nodes[colliding_dg], distance=distance)

        # Filter out singletons
        graph_nodes = list(nodes)
        for n in graph_nodes:
            if len(list(self.neighbors(nodes[n]))) == 0:
                self.remove_node(nodes[n])
                del nodes[n]

        return bidict(nodes)

    def create_graph_from_index(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
208
209
        nodes = {}

210
        for dmer in self.index:
Yoann Dufresne's avatar
Yoann Dufresne committed
211
            dgs = list(set(self.index[dmer]))
212
            for d_idx, dg in enumerate(dgs):
213
                # Create a node name
214
                if dg not in nodes:
215
                    nodes[dg] = dg.idx
216

217
                    # Add the node
218
                    self.add_node(nodes[dg])
219
220
221
222
223
224
                    # Add covering barcode edges
                    barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges])
                    self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
                    self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
                    self.nodes[nodes[dg]]["udg"] = str(dg)

Yoann Dufresne's avatar
Yoann Dufresne committed
225

226
                # Add the edges
Yoann Dufresne's avatar
Yoann Dufresne committed
227
228
229
230
231
                for prev_idx in range(d_idx):
                    prev_dg = dgs[prev_idx]

                    # Add on small distances
                    d = dg.distance_to(prev_dg)
232
                    if d <= min(len(dg.node_set)/2, len(prev_dg.node_set)/2):
Yoann Dufresne's avatar
Yoann Dufresne committed
233
                        self.add_edge(nodes[dg], nodes[prev_dg], distance=d)
Yoann Dufresne's avatar
Yoann Dufresne committed
234

235
        return bidict(nodes)
Yoann Dufresne's avatar
Yoann Dufresne committed
236

237

Yoann Dufresne's avatar
Yoann Dufresne committed
238
239
240
241
242
243
244
245
246
247
    def compute_distances(self):
        for x, y, data in self.edges(data=True):
            dg1 = self.node_by_idx[x]
            dg2 = self.node_by_idx[y]
            if dg1 == dg2:
                continue

            # Distance computing and adding in the dist dicts
            d = dg1.distance_to(dg2)
            data["distance"] = d