d2_graph.py 8.85 KB
Newer Older
Yoann Dufresne's avatar
Yoann Dufresne committed
1
import networkx as nx
2
import itertools
Yoann Dufresne's avatar
Yoann Dufresne committed
3
from bidict import bidict
4
import sys
5

6
# from deconvolution.dgraph.FixedDGIndex import FixedDGIndex
Yoann Dufresne's avatar
Yoann Dufresne committed
7
8
9
10
from deconvolution.dgraph.VariableDGIndex import VariableDGIndex
from deconvolution.dgraph.d_graph import Dgraph
from deconvolution.dgraph.CliqueDGFactory import CliqueDGFactory
from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory
Yoann Dufresne's avatar
Yoann Dufresne committed
11
12


13
class D2Graph(nx.Graph):
Yoann Dufresne's avatar
Yoann Dufresne committed
14
    """D2Graph (read it (d-graph)²)"""
15
    def __init__(self, barcode_graph, debug=False, debug_path='.'):
Yoann Dufresne's avatar
Yoann Dufresne committed
16
        super(D2Graph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
17
18
19
        self.all_d_graphs = []
        self.d_graphs_per_node = {}
        self.node_by_idx = {}
20
        self.barcode_graph = barcode_graph
21
22
        self.index = None

Yoann Dufresne's avatar
Yoann Dufresne committed
23
        self.variables_per_lcp = {}
Yoann Dufresne's avatar
Yoann Dufresne committed
24

25
        # Number the edges from original graph
26
        self.barcode_edge_idxs = {}
27
        self.nb_uniq_edge = 0
28
        for idx, edge in enumerate(self.barcode_graph.edges()):
29
30
            if edge == (edge[1], edge[0]):
                self.nb_uniq_edge += 1
31
            if edge in self.barcode_edge_idxs:
32
                print("Edge already present")
33
34
            self.barcode_edge_idxs[edge] = idx
            self.barcode_edge_idxs[(edge[1], edge[0])] = idx
Yoann Dufresne's avatar
Yoann Dufresne committed
35

36
37
38
        self.debug = debug
        self.debug_path = debug_path

39

40
41
42
    """ Redefine subgraph to avoid errors type instantiation errors.
    """
    def subgraph(self, nodes):
Yoann Dufresne's avatar
Yoann Dufresne committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
        nodes = frozenset(nodes)

        G = D2Graph(self.barcode_graph)
        G.barcode_edge_idxs = self.barcode_edge_idxs

        # Add sub-nodes
        for node in nodes:
            G.add_node(node)
            G.nodes[node].update(self.nodes[node])

        # Add edges
        for node1, node2, data in self.edges(data=True):
            if node1 in nodes and node2 in nodes:
                G.add_edge(node1, node2, distance=data["distance"])

        # Node by idx
        G.node_by_idx = self.node_by_idx

        return G
62

Yoann Dufresne's avatar
Yoann Dufresne committed
63
64
65
    def clone(self):
        return self.subgraph(list(self.nodes()))

66

67
    def construct_from_barcodes(self, neighbor_threshold=0.25, min_size_clique=4, verbose=False, clique_mode=None, threads=1):
Yoann Dufresne's avatar
Yoann Dufresne committed
68
        # Compute all the d-graphs
69
        if verbose:
70
            print("Computing the unit d-graphs..")
Yoann Dufresne's avatar
Yoann Dufresne committed
71
72
73
74
        dg_factory = None
        if clique_mode == "louvain":
            dg_factory = LouvainDGFactory(self.barcode_graph)
        else:
75
76
            dg_factory = CliqueDGFactory(self.barcode_graph, min_size_clique=min_size_clique, debug=self.debug, debug_path=self.debug_path)
        self.d_graphs_per_node = dg_factory.generate_all_dgraphs(threads=threads, verbose=verbose)
77
78
79
        if verbose:
            counts = sum(len(x) for x in self.d_graphs_per_node.values())
            print(f"\t {counts} computed d-graphs")
Yoann Dufresne's avatar
Yoann Dufresne committed
80
81
82
83
84
85
        for d_graphs in self.d_graphs_per_node.values():
            self.all_d_graphs.extend(d_graphs)

        # Number the d_graphs
        for idx, d_graph in enumerate(self.all_d_graphs):
            d_graph.idx = idx
86
            self.node_by_idx[idx] = d_graph
87

88
        if verbose:
89
            print("Compute the graph")
90
        # Create the graph
Yoann Dufresne's avatar
Yoann Dufresne committed
91
        self.bidict_nodes = self.create_graph_from_node_neighborhoods(neighbor_threshold)
Yoann Dufresne's avatar
Yoann Dufresne committed
92
93


Yoann Dufresne's avatar
Yoann Dufresne committed
94
    def get_lcp(self, obj):
Yoann Dufresne's avatar
Yoann Dufresne committed
95
96
97
98
99
        if type(obj) == str:
            obj = int(obj)
        if type(obj) == int:
            obj = self.node_by_idx[obj]
        return obj
Yoann Dufresne's avatar
Yoann Dufresne committed
100

Yoann Dufresne's avatar
Yoann Dufresne committed
101
    def get_covering_variables(self, obj):
Yoann Dufresne's avatar
Yoann Dufresne committed
102
        lcp = self.get_lcp(obj)
Yoann Dufresne's avatar
Yoann Dufresne committed
103
104
105
106
107
        if lcp not in self.variables_per_lcp:
            variables = []
            for e in lcp.edges:
                variables.append(self.barcode_edge_idxs[e])
            self.variables_per_lcp[lcp] = variables
Yoann Dufresne's avatar
Yoann Dufresne committed
108

Yoann Dufresne's avatar
Yoann Dufresne committed
109
        return self.variables_per_lcp[lcp]
Yoann Dufresne's avatar
Yoann Dufresne committed
110
111


Yoann Dufresne's avatar
Yoann Dufresne committed
112
113
114
    def save(self, filename):
        with open(filename, "w") as fp:
            # First line nb_nodes nb_cov_var
115
            fp.write(f"{len(self.all_d_graphs)} {int((len(self.barcode_edge_idxs)+self.nb_uniq_edge)/2)}\n")
Yoann Dufresne's avatar
Yoann Dufresne committed
116
117
118

            # Write the edges per d_graph
            for d_graph in self.all_d_graphs:
119
                fp.write(f"{d_graph.idx} {' '.join([str(self.barcode_edge_idxs[e]) for e in d_graph.edges])}\n")
Yoann Dufresne's avatar
Yoann Dufresne committed
120
121

            # Write the distances
122
            for x, y, data in self.edges(data=True):
123
                fp.write(f"{x} {y} {data['distance']}\n")
Yoann Dufresne's avatar
Yoann Dufresne committed
124

Yoann Dufresne's avatar
Yoann Dufresne committed
125

126
127
    def load(self, filename):
        # Reload the graph
128
        G = nx.read_gexf(filename)
129
        for node, attrs in G.nodes(data=True):
130
131
            self.add_node(node)
            self.nodes[node].update(attrs)
132
133
        for edge in G.edges(data=True):
            self.add_edge(edge[0], edge[1], distance=edge[2]["distance"])
134
135

        # Extract d-graphs from nx graph
Yoann Dufresne's avatar
Yoann Dufresne committed
136
        # self.node_by_name = {}
137
        self.bidict_nodes = {}
138
139
140
        for idx, node in enumerate(self.nodes(data=True)):
            node, data = node
            dg = Dgraph.load(data["udg"], self.barcode_graph)
141
            self.bidict_nodes[node] = dg
142
143
            self.all_d_graphs.append(dg)
            if dg.idx == -1:
Yoann Dufresne's avatar
Yoann Dufresne committed
144
                dg.idx = int(node)
145
            self.node_by_idx[dg.idx] = dg
Yoann Dufresne's avatar
Yoann Dufresne committed
146
            # self.node_by_name[node] = dg
147
        self.bidict_nodes = bidict(self.bidict_nodes)
148

149

150
    def create_index_from_tuples(self, tuple_size=3, verbose=True):
Yoann Dufresne's avatar
Yoann Dufresne committed
151
        index = {}
152

153
154
155
156
157
158
159
160
        if verbose:
            print("\tIndex d-graphs")
        for lst_idx, dg in enumerate(self.all_d_graphs):
            if verbose:
                sys.stdout.write(f"\r\t{lst_idx+1}/{len(self.all_d_graphs)}")
                sys.stdout.flush()

            nodelist = dg.to_sorted_list()
Yoann Dufresne's avatar
Yoann Dufresne committed
161
162
163
164
165
            if len(nodelist) < tuple_size:
                continue

            # Generate all tuplesize-mers
            for dmer in itertools.combinations(nodelist, tuple_size):
166
167
168
169
170
171
                if dmer not in index:
                    index[dmer] = set()
                index[dmer].add(dg)

        if verbose:
            print()
172
173
174
175

        return index


176
177
178
179
180
181
182
183
184
185
186
187
    def create_graph_from_node_neighborhoods(self, neighborhood_threshold=0.25):
        nodes = {}

        # Create the nodes of d2g from udgs
        for dg in self.all_d_graphs:
            nodes[dg] = dg.idx
            self.add_node(nodes[dg])
            # Add covering barcode edges
            barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges])
            self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
            self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
            self.nodes[nodes[dg]]["udg"] = str(dg)
188
            self.nodes[nodes[dg]]["central_node_barcode"] = str(dg).split(']')[0]+']'
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

        # Create the edges from neighbor edges
        for dg in self.all_d_graphs:
            for node in dg.to_node_set():
                if node == dg.center:
                    continue
                entry = frozenset({node})
                if entry in self.d_graphs_per_node:
                    colliding_dgs = self.d_graphs_per_node[entry]
                    for colliding_dg in colliding_dgs:
                        distance = dg.distance_to(colliding_dg)
                        distance_ratio = distance / (len(dg.nodes) + len(colliding_dg.nodes))
                        if distance_ratio <= neighborhood_threshold:
                            self.add_edge(nodes[dg], nodes[colliding_dg], distance=distance)

        # Filter out singletons
        graph_nodes = list(nodes)
        for n in graph_nodes:
            if len(list(self.neighbors(nodes[n]))) == 0:
                self.remove_node(nodes[n])
                del nodes[n]

        return bidict(nodes)

    def create_graph_from_index(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
214
215
        nodes = {}

216
        for dmer in self.index:
Yoann Dufresne's avatar
Yoann Dufresne committed
217
            dgs = list(set(self.index[dmer]))
218
            for d_idx, dg in enumerate(dgs):
219
                # Create a node name
220
                if dg not in nodes:
221
                    nodes[dg] = dg.idx
222

223
                    # Add the node
224
                    self.add_node(nodes[dg])
225
226
227
228
229
230
                    # Add covering barcode edges
                    barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges])
                    self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
                    self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
                    self.nodes[nodes[dg]]["udg"] = str(dg)

Yoann Dufresne's avatar
Yoann Dufresne committed
231

232
                # Add the edges
Yoann Dufresne's avatar
Yoann Dufresne committed
233
234
235
236
237
                for prev_idx in range(d_idx):
                    prev_dg = dgs[prev_idx]

                    # Add on small distances
                    d = dg.distance_to(prev_dg)
238
                    if d <= min(len(dg.node_set)/2, len(prev_dg.node_set)/2):
Yoann Dufresne's avatar
Yoann Dufresne committed
239
                        self.add_edge(nodes[dg], nodes[prev_dg], distance=d)
Yoann Dufresne's avatar
Yoann Dufresne committed
240

241
        return bidict(nodes)
Yoann Dufresne's avatar
Yoann Dufresne committed
242

243

Yoann Dufresne's avatar
Yoann Dufresne committed
244
245
246
247
248
249
250
251
252
253
    def compute_distances(self):
        for x, y, data in self.edges(data=True):
            dg1 = self.node_by_idx[x]
            dg2 = self.node_by_idx[y]
            if dg1 == dg2:
                continue

            # Distance computing and adding in the dist dicts
            d = dg1.distance_to(dg2)
            data["distance"] = d