diff --git a/deconvolution/d2graph/d2_graph.py b/deconvolution/d2graph/d2_graph.py index 45d92b6f9f5b76122506ad2f3d22b7759fca43c7..6eb32bb5f19122b0e182ca25f19852031e263aaa 100644 --- a/deconvolution/d2graph/d2_graph.py +++ b/deconvolution/d2graph/d2_graph.py @@ -12,26 +12,18 @@ from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory class D2Graph(nx.Graph): """D2Graph (read it (d-graph)²)""" - def __init__(self, barcode_graph, debug=False, debug_path='.'): + def __init__(self, debug=False, debug_path='.'): super(D2Graph, self).__init__() self.all_d_graphs = [] self.d_graphs_per_node = {} self.node_by_idx = {} - self.barcode_graph = barcode_graph + self.barcode_graph = None self.index = None self.variables_per_lcp = {} - # Number the edges from original graph self.barcode_edge_idxs = {} self.nb_uniq_edge = 0 - for idx, edge in enumerate(self.barcode_graph.edges()): - if edge == (edge[1], edge[0]): - self.nb_uniq_edge += 1 - if edge in self.barcode_edge_idxs: - print("Edge already present") - self.barcode_edge_idxs[edge] = idx - self.barcode_edge_idxs[(edge[1], edge[0])] = idx self.debug = debug self.debug_path = debug_path @@ -64,7 +56,19 @@ class D2Graph(nx.Graph): return self.subgraph(list(self.nodes())) - def construct_from_barcodes(self, neighbor_threshold=0.25, min_size_clique=4, verbose=False, clique_mode=None, threads=1): + def construct_from_barcodes(self, barcode_graph, neighbor_threshold=0.25, min_size_clique=4, verbose=False, clique_mode=None, threads=1): + self.barcode_graph = barcode_graph + + # Number the edges from original graph + for idx, edge in enumerate(self.barcode_graph.edges()): + if edge == (edge[1], edge[0]): + self.nb_uniq_edge += 1 + if edge in self.barcode_edge_idxs: + print("Edge already present") + self.barcode_edge_idxs[edge] = idx + self.barcode_edge_idxs[(edge[1], edge[0])] = idx + self.barcode_graph[edge[0]][edge[1]]["uniq_idx"] = idx + # Compute all the d-graphs if verbose: print("Computing the unit d-graphs..") @@ -181,7 +185,7 @@ class D2Graph(nx.Graph): nodes[dg] = dg.idx self.add_node(nodes[dg]) # Add covering barcode edges - barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges]) + barcode_edges = " ".join([str(x) for x in dg.edges]) self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}" self.nodes[nodes[dg]]["udg"] = str(dg) diff --git a/deconvolution/dgraph/d_graph.py b/deconvolution/dgraph/d_graph.py index 496b9474f9a7924a85e6be3a0629140515563d6f..1d49a3ccdf403b453e39a1bb0947d7fc824bd14c 100644 --- a/deconvolution/dgraph/d_graph.py +++ b/deconvolution/dgraph/d_graph.py @@ -11,60 +11,57 @@ class Dgraph(object): self.center = center self.score = 0 self.halves = [[], []] - self.connexity = [[], []] + self.connexity = [{}, {}] self.nodes = [self.center] self.node_set = set(self.nodes) - self.edges = [] self.ordered_list = None self.sorted_list = None + self.edges = set() - self.marked = False - - """ Static method to load a dgraph from a text - @param text the saved d-graph - @param barcode_graph Barcode graph from which the d-graph is extracted - @return a new d-graph object corresponding to the test - """ - def load(text, barcode_graph): + @staticmethod + def load(lcp_txt, variables): + """ Static method to load a dgraph from a text + :param lcp_txt: the saved d-graph + :return: a new d-graph object corresponding to the test + """ # basic split - text = text.replace(']', '') - _, head, h1, h2 = text.split('[') + lcp_txt = lcp_txt.replace(']', '') + _, head, h1, h2 = lcp_txt.split('[') # Head parsing center = head.replace(' ', '') - if center not in barcode_graph: - center = int(center) dg = Dgraph(center) # Reload halves h1 = [x for x in h1.split(',')] - h1 = [int(x) if x not in barcode_graph else x for x in h1] h2 = [x for x in h2.split(',')] - h2 = [int(x) if x not in barcode_graph else x for x in h2] - dg.put_halves(h1, h2, barcode_graph) + + dg.halves[0] = h1 + dg.node_set.update(h1) + dg.nodes.extend(h1) + dg.halves[1] = h2 + dg.node_set.update(h2) + dg.nodes.extend(h2) return dg - """ Compute the d-graph quality (score) according to the connectivity between the two halves. - @param h1 First half of the d-graph - @param h2 Second half of the d-graph - @param graph The connectivity graph - """ def put_halves(self, h1, h2, graph): + """ Compute the d-graph quality (score) according to the connectivity between the two halves. + :param h1: First half of the d-graph + :param h2: Second half of the d-graph + :param graph: The barcode graph + """ self.score = 0 + # First half self.halves[0] = h1 - for node in h1: - self.node_set.add(node) - self.nodes.append(node) + self.node_set.update(h1) + self.nodes.extend(h1) + self.connexity[0] = {key: 0 for key in h1} + # second half self.halves[1] = h2 - for node in h2: - self.node_set.add(node) - self.nodes.append(node) - - # self.nodes = sorted([self.center] + self.halves[0] + self.halves[1]) - self.connexity[0] = {key: 0 for key in self.halves[0]} - self.connexity[1] = {key: 0 for key in self.halves[1]} - self.edges = [] + self.node_set.update(h2) + self.nodes.extend(h2) + self.connexity[1] = {key: 0 for key in h2} # Compute link arities for node1 in self.halves[0]: @@ -76,19 +73,10 @@ class Dgraph(object): self.connexity[0][node1] += 1 self.connexity[1][node2] += 1 - # Compute links from the center to the other nodes - for idx, node1 in enumerate(self.nodes): - for node2 in self.nodes[idx+1:]: - if graph.has_edge(node1, node2): - if node1 < node2: - self.edges.append((node1, node2)) - else: - self.edges.append((node2, node1)) - - # Sort the halves by descending connexity - connex = self.connexity - self.halves[0].sort(reverse=True, key=lambda v: connex[0][v]) - self.halves[1].sort(reverse=True, key=lambda v: connex[1][v]) + # Compute covering variable + subgraph = graph.subgraph(self.nodes) + for _, _, id in subgraph.edges.data("uniq_idx"): + self.edges.add(id) def get_link_divergence(self): return int(abs(self.score - self.get_optimal_score())) @@ -104,7 +92,7 @@ class Dgraph(object): def to_ordered_lists(self): if self.ordered_list is None: - hands = [[],[]] + hands = [[], []] for idx in range(2): prev_connectivity = -1 for node in self.halves[idx]: diff --git a/deconvolution/main/to_d2_graph.py b/deconvolution/main/to_d2_graph.py index c7791c903c69effbfbe9e5f9552469ecbc85377b..cb308524c11551dc0e8ef450b81a32677fcc84d0 100755 --- a/deconvolution/main/to_d2_graph.py +++ b/deconvolution/main/to_d2_graph.py @@ -8,18 +8,21 @@ from deconvolution.d2graph import d2_graph as d2 def parse_arguments(): - parser = argparse.ArgumentParser(description='Transform a 10X barcode graph into a d2 graph. The program dig for the d-graphs and then merge them into a d2-graph.') + parser = argparse.ArgumentParser(description='Transform a barcode graph into a lcp graph. The program dig for a set of lcps and then merge them into a lcp graph.') parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.') - parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.") + parser.add_argument('--output_prefix', '-o', default="", help="Output file prefix.") parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation') parser.add_argument('--debug', '-d', action='store_true', help="Debug") parser.add_argument('--verbose', '-v', action='store_true', help="Verbose") parser.add_argument('--edge_divergence_threshold', '-dt', default=0.25, type=float, help='Divergence threshold value to link two udgs in the d2-graph') parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)") parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques") - parser.add_argument('--comtest', '-k', action='store_true', help="Enable [placeholder] community detection algorithm instead of max-cliques") args = parser.parse_args() + + if args.output_prefix == "": + args.output_prefix = ".".join(args.barcode_graph.split(".")[:-1]) + "_lcpg" + return args def main(): @@ -28,26 +31,10 @@ def main(): # debug = args.debug filename = args.barcode_graph - - def dprint(s): - from datetime import datetime - t = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - print(t, s) - - dprint("loading barcode graph") - if filename.endswith('.gexf'): - G = nx.read_gexf(filename) - elif filename.endswith('.graphml'): - G = nx.read_graphml(filename) - else: - print("Input file must be gexf or graphml formatted", file=sys.stderr) - exit(1) - dprint("barcode graph loaded") + barcode_graph = nx.read_gexf(filename) if args.louvain: clique_mode = "louvain" - elif args.comtest: - clique_mode = "testing" else: clique_mode = None @@ -62,15 +49,15 @@ def main(): shutil.rmtree(debug_path) os.mkdir(debug_path) - # Index size must be changed for general purpose. 8 is good for d=5 - dprint("creating D2graph object") - d2g = d2.D2Graph(G, debug=debug, debug_path=debug_path) - dprint("D2 graph object created") - dprint("constructing d2 graph from barcode graph") - d2g.construct_from_barcodes(neighbor_threshold=args.edge_divergence_threshold, clique_mode=clique_mode, threads=args.threads, verbose=args.verbose) - dprint("[debug] d2 graph constructed") - - # d2g.save(f"{args.output_prefix}.tsv") + d2g = d2.D2Graph(debug=debug, debug_path=debug_path) + d2g.construct_from_barcodes( + barcode_graph, + neighbor_threshold=args.edge_divergence_threshold, + clique_mode=clique_mode, + threads=args.threads, + verbose=args.verbose + ) + nx.write_gexf(d2g, f"{args.output_prefix}.gexf")