d2_graph.py 6.62 KB
Newer Older
Yoann Dufresne's avatar
Yoann Dufresne committed
1
import networkx as nx
2
import itertools
Yoann Dufresne's avatar
Yoann Dufresne committed
3
from bidict import bidict
4

5
from d_graph import compute_all_max_d_graphs, filter_dominated, list_domination_filter
Yoann Dufresne's avatar
Yoann Dufresne committed
6
7
8
9


class D2Graph(object):
    """D2Graph (read it (d-graph)²)"""
10
    def __init__(self, graph, index_size=3, verbose=True, debug=False):
Yoann Dufresne's avatar
Yoann Dufresne committed
11
12
13
14
        super(D2Graph, self).__init__()
        self.graph = graph

        # Compute all the d-graphs
15
16
        if verbose:
            print("Compute the unit d-graphs")
Yoann Dufresne's avatar
Yoann Dufresne committed
17
        self.d_graphs_per_node = compute_all_max_d_graphs(self.graph, debug=debug)
18
        self.d_graphs_per_node = filter_dominated(self.d_graphs_per_node)
Yoann Dufresne's avatar
Yoann Dufresne committed
19
20
21
22
23
24
        self.all_d_graphs = []
        for d_graphs in self.d_graphs_per_node.values():
            self.all_d_graphs.extend(d_graphs)

        # Name the d-graphs
        # Number the d_graphs
25
        self.node_by_idx = {}
Yoann Dufresne's avatar
Yoann Dufresne committed
26
27
        for idx, d_graph in enumerate(self.all_d_graphs):
            d_graph.idx = idx
28
            self.node_by_idx[idx] = d_graph
Yoann Dufresne's avatar
Yoann Dufresne committed
29
30
31
32
33
34
35

        # Number the edges from original graph
        self.edge_idxs = {}
        self.nb_uniq_edge = 0
        for idx, edge in enumerate(self.graph.edges()):
            if edge == (edge[1], edge[0]):
                self.nb_uniq_edge += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
36
37
            if edge in self.edge_idxs:
                print("Edge already present")
Yoann Dufresne's avatar
Yoann Dufresne committed
38
39
            self.edge_idxs[edge] = idx
            self.edge_idxs[(edge[1], edge[0])] = idx
Yoann Dufresne's avatar
Yoann Dufresne committed
40
41
        
        # Index all the d-graphes
42
43
        if verbose:
            print("Compute the dmer index")
Yoann Dufresne's avatar
Yoann Dufresne committed
44
        self.index = self.create_index_from_tuples(index_size)
45
        self.filter_dominated_in_index()
46
47
48
49
50
51
        # Compute node distances for pair of dgraphs that share at least 1 dmer.
        if verbose:
            print("Compute a subset of distances")
        self.distances = self.compute_distances()

        # Create the graph
Yoann Dufresne's avatar
Yoann Dufresne committed
52
        self.nx_graph, self.nodes = self.to_nx_graph()
Yoann Dufresne's avatar
Yoann Dufresne committed
53
54


Yoann Dufresne's avatar
Yoann Dufresne committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    def save(self, filename):
        with open(filename, "w") as fp:
            # First line nb_nodes nb_cov_var
            fp.write(f"{len(self.all_d_graphs)} {int((len(self.edge_idxs)+self.nb_uniq_edge)/2)}\n")

            # Write the edges per d_graph
            for d_graph in self.all_d_graphs:
                fp.write(f"{d_graph.idx} {' '.join([str(self.edge_idxs[e]) for e in d_graph.edges])}\n")

            # Write the distances
            for d_graph in self.all_d_graphs:
                for neighbor_idx, dist in self.distances[d_graph.idx].items():
                    fp.write(f"{d_graph.idx} {neighbor_idx} {dist}\n")

Yoann Dufresne's avatar
Yoann Dufresne committed
69
70

    def create_index_from_tuples(self, tuple_size=3):
Yoann Dufresne's avatar
Yoann Dufresne committed
71
        index = {}
72

73
        perfect = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
74
75
76
77
78
79
80
81
82
83
84
85
        for dg in self.all_d_graphs:
            nodelist = dg.to_list()
            nodelist.sort()
            if len(nodelist) < tuple_size:
                continue

            # Generate all tuplesize-mers
            for dmer in itertools.combinations(nodelist, tuple_size):
                if not dmer in index:
                    index[dmer] = [dg]
                else:
                    index[dmer].append(dg)
86
87
88
89

        return index


90
    def compute_distances(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
91
        distances = {dg.idx:{} for dg in self.all_d_graphs}
92
93
94
95
96
97
98
99
100
101

        for dmer, dgraphs in self.index.items():
            for idx1, dg1 in enumerate(dgraphs):
                for idx2 in range(idx1+1, len(dgraphs)):
                    dg2 = dgraphs[idx2]
                    if dg1 == dg2:
                        continue

                    # Distance computing and adding in the dist dicts
                    d = dg1.distance_to(dg2)
Yoann Dufresne's avatar
Yoann Dufresne committed
102
103
                    distances[dg1.idx][dg2.idx] = d
                    distances[dg2.idx][dg1.idx] = d
104
105
106
107

        return distances


108
109
110
    def create_index_ordered(self):
        index = {}

111
        perfect = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
112
113
        for node in self.d_graphs_per_node:
            for dg in self.d_graphs_per_node[node]:
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
                lst = dg.to_ordered_lists()
                # Generate all dmers without the first node
                # pull all the values 
                concat = [el for l in lst[1:] for el in l]
                # generate dmers
                for idx in range(len(lst[0])):
                    dmer = frozenset(concat + lst[0][:idx] + lst[0][idx+1:])
                    if not dmer in index:
                        index[dmer] = [dg]
                    else:
                        index[dmer].append(dg)

                # Generate all dmers without the last node
                # pull all the values 
                concat = [el for l in lst[:-1] for el in l]
                # generate dmers
                for idx in range(len(lst[-1])):
                    dmer = frozenset(concat + lst[-1][:idx] + lst[-1][idx+1:])
                    if not dmer in index:
                        index[dmer] = [dg]
                    else:
                        index[dmer].append(dg)

Yoann Dufresne's avatar
Yoann Dufresne committed
137
138
139
        return index


140
    def to_nx_graph(self):
Yoann Dufresne's avatar
Yoann Dufresne committed
141
        # next_idx = 0
Yoann Dufresne's avatar
Yoann Dufresne committed
142
143
144
        nodes = {}
        G = nx.Graph()

145
146
147
148
        for dmer in self.index:
            for d_idx, dg in enumerate(self.index[dmer]):
                # Create a node name
                if not dg in nodes:
Yoann Dufresne's avatar
Yoann Dufresne committed
149
150
                    nodes[dg] = str(dg)
                    # next_idx += 1
Yoann Dufresne's avatar
Yoann Dufresne committed
151
                
152
153
                    # Add the node
                    G.add_node(nodes[dg])
Yoann Dufresne's avatar
Yoann Dufresne committed
154

155
156
157
                # Add the edges
                for prev_node in self.index[dmer][:d_idx]:
                    G.add_edge(nodes[dg], nodes[prev_node])
Yoann Dufresne's avatar
Yoann Dufresne committed
158

Yoann Dufresne's avatar
Yoann Dufresne committed
159
        return G, bidict(nodes)
Yoann Dufresne's avatar
Yoann Dufresne committed
160

161

162
163
164
165
166
167
    def filter_dominated_in_index(self):
        to_remove = []

        # Find dominated
        for dmer, dg_list in self.index.items():
            undominated = list_domination_filter(dg_list)
168
169
170
171
            # if len(undominated) > 1:
            #     print(dmer)
            #     print("\n".join([str(x) for x in undominated]))
            #     print()
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188

            # Register dominated
            if len(dg_list) != len(undominated):
                for dg in dg_list:
                    if not dg in undominated:
                        to_remove.append(dg)

            self.index[dmer] = undominated

        to_remove = frozenset(to_remove)
        # Remove dominated in global list
        for r_dg in to_remove:
            self.all_d_graphs.remove(r_dg)
            self.d_graphs_per_node[r_dg.center].remove(r_dg)

        # Remove dominated in index
        removable_dmers = []
189
        for dmer in self.index:
190
            for r_dg in to_remove:
191
192
193
                if r_dg in self.index[dmer]:
                    self.index[dmer] = list(filter(lambda x: x!=r_dg, self.index[dmer]))
            if len(self.index[dmer]) == 0:
194
195
196
197
198
199
200
                removable_dmers.append(dmer)

        # Remove empty dmers
        for dmer in removable_dmers:
            del self.index[dmer]


Yoann Dufresne's avatar
Yoann Dufresne committed
201