d2_graph.py 8.36 KB
Newer Older
Yoann Dufresne's avatar
Yoann Dufresne committed
1
import networkx as nx
2
import itertools
Yoann Dufresne's avatar
Yoann Dufresne committed
3
from bidict import bidict
4
import sys
5

6
# from deconvolution.dgraph.FixedDGIndex import FixedDGIndex
Yoann Dufresne's avatar
Yoann Dufresne committed
7
8
9
10
from deconvolution.dgraph.VariableDGIndex import VariableDGIndex
from deconvolution.dgraph.d_graph import Dgraph
from deconvolution.dgraph.CliqueDGFactory import CliqueDGFactory
from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory
Yoann Dufresne's avatar
Yoann Dufresne committed
11
12


13
class D2Graph(nx.Graph):
Yoann Dufresne's avatar
Yoann Dufresne committed
14
    """D2Graph (read it (d-graph)²)"""
15
    def __init__(self, barcode_graph, debug=False, debug_path='.'):
Yoann Dufresne's avatar
Yoann Dufresne committed
16
        super(D2Graph, self).__init__()
Yoann Dufresne's avatar
Yoann Dufresne committed
17
18
19
        self.all_d_graphs = []
        self.d_graphs_per_node = {}
        self.node_by_idx = {}
20
        self.barcode_graph = barcode_graph
21
22
        self.index = None

23
        # Number the edges from original graph
24
        self.barcode_edge_idxs = {}
25
        self.nb_uniq_edge = 0
26
        for idx, edge in enumerate(self.barcode_graph.edges()):
27
28
            if edge == (edge[1], edge[0]):
                self.nb_uniq_edge += 1
29
            if edge in self.barcode_edge_idxs:
30
                print("Edge already present")
31
32
            self.barcode_edge_idxs[edge] = idx
            self.barcode_edge_idxs[(edge[1], edge[0])] = idx
Yoann Dufresne's avatar
Yoann Dufresne committed
33

34
35
36
        self.debug = debug
        self.debug_path = debug_path

37

38
39
40
    """ Redefine subgraph to avoid errors type instantiation errors.
    """
    def subgraph(self, nodes):
Yoann Dufresne's avatar
Yoann Dufresne committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
        nodes = frozenset(nodes)

        G = D2Graph(self.barcode_graph)
        G.barcode_edge_idxs = self.barcode_edge_idxs

        # Add sub-nodes
        for node in nodes:
            G.add_node(node)
            G.nodes[node].update(self.nodes[node])

        # Add edges
        for node1, node2, data in self.edges(data=True):
            if node1 in nodes and node2 in nodes:
                G.add_edge(node1, node2, distance=data["distance"])

        # Node by idx
        G.node_by_idx = self.node_by_idx

        return G
60

Yoann Dufresne's avatar
Yoann Dufresne committed
61
62
63
    def clone(self):
        return self.subgraph(list(self.nodes()))

64

Yoann Dufresne's avatar
Yoann Dufresne committed
65
    def construct_from_barcodes(self, neighbor_threshold=0.25, verbose=True, clique_mode=None, threads=1):
Yoann Dufresne's avatar
Yoann Dufresne committed
66
        # Compute all the d-graphs
67
        if verbose:
68
            print("Computing the unit d-graphs..")
Yoann Dufresne's avatar
Yoann Dufresne committed
69
70
71
72
        dg_factory = None
        if clique_mode == "louvain":
            dg_factory = LouvainDGFactory(self.barcode_graph)
        else:
73
            dg_factory = CliqueDGFactory(self.barcode_graph, debug=self.debug, debug_path=self.debug_path)
74
        self.d_graphs_per_node = dg_factory.generate_all_dgraphs(threads=threads, verbose=True)
75
76
77
        if verbose:
            counts = sum(len(x) for x in self.d_graphs_per_node.values())
            print(f"\t {counts} computed d-graphs")
Yoann Dufresne's avatar
Yoann Dufresne committed
78
79
80
81
82
83
        for d_graphs in self.d_graphs_per_node.values():
            self.all_d_graphs.extend(d_graphs)

        # Number the d_graphs
        for idx, d_graph in enumerate(self.all_d_graphs):
            d_graph.idx = idx
84
            self.node_by_idx[idx] = d_graph
85

86
        if verbose:
87
            print("Compute the graph")
88
        # Create the graph
Yoann Dufresne's avatar
Yoann Dufresne committed
89
        self.bidict_nodes = self.create_graph_from_node_neighborhoods(neighbor_threshold)
Yoann Dufresne's avatar
Yoann Dufresne committed
90
91


Yoann Dufresne's avatar
Yoann Dufresne committed
92
93
94
95
96
97
98
99
    def get_covering_variables(self, udg):
        variables = []
        for e in udg.edges:
            variables.append(self.barcode_edge_idxs[e])

        return frozenset(variables)


Yoann Dufresne's avatar
Yoann Dufresne committed
100
101
102
    def save(self, filename):
        with open(filename, "w") as fp:
            # First line nb_nodes nb_cov_var
103
            fp.write(f"{len(self.all_d_graphs)} {int((len(self.barcode_edge_idxs)+self.nb_uniq_edge)/2)}\n")
Yoann Dufresne's avatar
Yoann Dufresne committed
104
105
106

            # Write the edges per d_graph
            for d_graph in self.all_d_graphs:
107
                fp.write(f"{d_graph.idx} {' '.join([str(self.barcode_edge_idxs[e]) for e in d_graph.edges])}\n")
Yoann Dufresne's avatar
Yoann Dufresne committed
108
109

            # Write the distances
110
            for x, y, data in self.edges(data=True):
111
                fp.write(f"{x} {y} {data['distance']}\n")
Yoann Dufresne's avatar
Yoann Dufresne committed
112

Yoann Dufresne's avatar
Yoann Dufresne committed
113

114
115
    def load(self, filename):
        # Reload the graph
116
        G = nx.read_gexf(filename)
117
        for node, attrs in G.nodes(data=True):
118
119
            self.add_node(node)
            self.nodes[node].update(attrs)
120
121
        for edge in G.edges(data=True):
            self.add_edge(edge[0], edge[1], distance=edge[2]["distance"])
122
123

        # Extract d-graphs from nx graph
Yoann Dufresne's avatar
Yoann Dufresne committed
124
        # self.node_by_name = {}
125
        self.bidict_nodes = {}
126
127
128
        for idx, node in enumerate(self.nodes(data=True)):
            node, data = node
            dg = Dgraph.load(data["udg"], self.barcode_graph)
129
            self.bidict_nodes[node] = dg
130
131
            self.all_d_graphs.append(dg)
            if dg.idx == -1:
Yoann Dufresne's avatar
Yoann Dufresne committed
132
                dg.idx = int(node)
133
            self.node_by_idx[dg.idx] = dg
Yoann Dufresne's avatar
Yoann Dufresne committed
134
            # self.node_by_name[node] = dg
135
        self.bidict_nodes = bidict(self.bidict_nodes)
136

137

138
    def create_index_from_tuples(self, tuple_size=3, verbose=True):
Yoann Dufresne's avatar
Yoann Dufresne committed
139
        index = {}
140

141
142
143
144
145
146
147
148
        if verbose:
            print("\tIndex d-graphs")
        for lst_idx, dg in enumerate(self.all_d_graphs):
            if verbose:
                sys.stdout.write(f"\r\t{lst_idx+1}/{len(self.all_d_graphs)}")
                sys.stdout.flush()

            nodelist = dg.to_sorted_list()
Yoann Dufresne's avatar
Yoann Dufresne committed
149
150
151
152
153
            if len(nodelist) < tuple_size:
                continue

            # Generate all tuplesize-mers
            for dmer in itertools.combinations(nodelist, tuple_size):
154
155
156
157
158
159
                if dmer not in index:
                    index[dmer] = set()
                index[dmer].add(dg)

        if verbose:
            print()
160
161
162
163

        return index


164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    def create_graph_from_node_neighborhoods(self, neighborhood_threshold=0.25):
        nodes = {}

        # Create the nodes of d2g from udgs
        for dg in self.all_d_graphs:
            nodes[dg] = dg.idx
            self.add_node(nodes[dg])
            # Add covering barcode edges
            barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges])
            self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
            self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
            self.nodes[nodes[dg]]["udg"] = str(dg)

        # Create the edges from neighbor edges
        for dg in self.all_d_graphs:
            for node in dg.to_node_set():
                if node == dg.center:
                    continue
                entry = frozenset({node})
                if entry in self.d_graphs_per_node:
                    colliding_dgs = self.d_graphs_per_node[entry]
                    for colliding_dg in colliding_dgs:
                        distance = dg.distance_to(colliding_dg)
                        distance_ratio = distance / (len(dg.nodes) + len(colliding_dg.nodes))
                        if distance_ratio <= neighborhood_threshold:
                            self.add_edge(nodes[dg], nodes[colliding_dg], distance=distance)

        # Filter out singletons
        graph_nodes = list(nodes)
        for n in graph_nodes:
            if len(list(self.neighbors(nodes[n]))) == 0:
                self.remove_node(nodes[n])
                del nodes[n]

        return bidict(nodes)

    def create_graph_from_index(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
201
202
        nodes = {}

203
        for dmer in self.index:
Yoann Dufresne's avatar
Yoann Dufresne committed
204
            dgs = list(set(self.index[dmer]))
205
            for d_idx, dg in enumerate(dgs):
206
                # Create a node name
207
                if dg not in nodes:
208
                    nodes[dg] = dg.idx
209

210
                    # Add the node
211
                    self.add_node(nodes[dg])
212
213
214
215
216
217
                    # Add covering barcode edges
                    barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges])
                    self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
                    self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
                    self.nodes[nodes[dg]]["udg"] = str(dg)

Yoann Dufresne's avatar
Yoann Dufresne committed
218

219
                # Add the edges
Yoann Dufresne's avatar
Yoann Dufresne committed
220
221
222
223
224
                for prev_idx in range(d_idx):
                    prev_dg = dgs[prev_idx]

                    # Add on small distances
                    d = dg.distance_to(prev_dg)
225
                    if d <= min(len(dg.node_set)/2, len(prev_dg.node_set)/2):
Yoann Dufresne's avatar
Yoann Dufresne committed
226
                        self.add_edge(nodes[dg], nodes[prev_dg], distance=d)
Yoann Dufresne's avatar
Yoann Dufresne committed
227

228
        return bidict(nodes)
Yoann Dufresne's avatar
Yoann Dufresne committed
229

230

Yoann Dufresne's avatar
Yoann Dufresne committed
231
232
233
234
235
236
237
238
239
240
    def compute_distances(self):
        for x, y, data in self.edges(data=True):
            dg1 = self.node_by_idx[x]
            dg2 = self.node_by_idx[y]
            if dg1 == dg2:
                continue

            # Distance computing and adding in the dist dicts
            d = dg1.distance_to(dg2)
            data["distance"] = d