Commit c6ef39a8 authored by Yoann Dufresne's avatar Yoann Dufresne

remove dependance to barcode graph in lcp graph loading

parent cd20dda3
...@@ -12,26 +12,18 @@ from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory ...@@ -12,26 +12,18 @@ from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory
class D2Graph(nx.Graph): class D2Graph(nx.Graph):
"""D2Graph (read it (d-graph)²)""" """D2Graph (read it (d-graph)²)"""
def __init__(self, barcode_graph, debug=False, debug_path='.'): def __init__(self, debug=False, debug_path='.'):
super(D2Graph, self).__init__() super(D2Graph, self).__init__()
self.all_d_graphs = [] self.all_d_graphs = []
self.d_graphs_per_node = {} self.d_graphs_per_node = {}
self.node_by_idx = {} self.node_by_idx = {}
self.barcode_graph = barcode_graph self.barcode_graph = None
self.index = None self.index = None
self.variables_per_lcp = {} self.variables_per_lcp = {}
# Number the edges from original graph
self.barcode_edge_idxs = {} self.barcode_edge_idxs = {}
self.nb_uniq_edge = 0 self.nb_uniq_edge = 0
for idx, edge in enumerate(self.barcode_graph.edges()):
if edge == (edge[1], edge[0]):
self.nb_uniq_edge += 1
if edge in self.barcode_edge_idxs:
print("Edge already present")
self.barcode_edge_idxs[edge] = idx
self.barcode_edge_idxs[(edge[1], edge[0])] = idx
self.debug = debug self.debug = debug
self.debug_path = debug_path self.debug_path = debug_path
...@@ -64,7 +56,19 @@ class D2Graph(nx.Graph): ...@@ -64,7 +56,19 @@ class D2Graph(nx.Graph):
return self.subgraph(list(self.nodes())) return self.subgraph(list(self.nodes()))
def construct_from_barcodes(self, neighbor_threshold=0.25, min_size_clique=4, verbose=False, clique_mode=None, threads=1): def construct_from_barcodes(self, barcode_graph, neighbor_threshold=0.25, min_size_clique=4, verbose=False, clique_mode=None, threads=1):
self.barcode_graph = barcode_graph
# Number the edges from original graph
for idx, edge in enumerate(self.barcode_graph.edges()):
if edge == (edge[1], edge[0]):
self.nb_uniq_edge += 1
if edge in self.barcode_edge_idxs:
print("Edge already present")
self.barcode_edge_idxs[edge] = idx
self.barcode_edge_idxs[(edge[1], edge[0])] = idx
self.barcode_graph[edge[0]][edge[1]]["uniq_idx"] = idx
# Compute all the d-graphs # Compute all the d-graphs
if verbose: if verbose:
print("Computing the unit d-graphs..") print("Computing the unit d-graphs..")
...@@ -181,7 +185,7 @@ class D2Graph(nx.Graph): ...@@ -181,7 +185,7 @@ class D2Graph(nx.Graph):
nodes[dg] = dg.idx nodes[dg] = dg.idx
self.add_node(nodes[dg]) self.add_node(nodes[dg])
# Add covering barcode edges # Add covering barcode edges
barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges]) barcode_edges = " ".join([str(x) for x in dg.edges])
self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}" self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
self.nodes[nodes[dg]]["udg"] = str(dg) self.nodes[nodes[dg]]["udg"] = str(dg)
......
...@@ -11,60 +11,57 @@ class Dgraph(object): ...@@ -11,60 +11,57 @@ class Dgraph(object):
self.center = center self.center = center
self.score = 0 self.score = 0
self.halves = [[], []] self.halves = [[], []]
self.connexity = [[], []] self.connexity = [{}, {}]
self.nodes = [self.center] self.nodes = [self.center]
self.node_set = set(self.nodes) self.node_set = set(self.nodes)
self.edges = []
self.ordered_list = None self.ordered_list = None
self.sorted_list = None self.sorted_list = None
self.edges = set()
self.marked = False @staticmethod
def load(lcp_txt, variables):
""" Static method to load a dgraph from a text """ Static method to load a dgraph from a text
@param text the saved d-graph :param lcp_txt: the saved d-graph
@param barcode_graph Barcode graph from which the d-graph is extracted :return: a new d-graph object corresponding to the test
@return a new d-graph object corresponding to the test """
"""
def load(text, barcode_graph):
# basic split # basic split
text = text.replace(']', '') lcp_txt = lcp_txt.replace(']', '')
_, head, h1, h2 = text.split('[') _, head, h1, h2 = lcp_txt.split('[')
# Head parsing # Head parsing
center = head.replace(' ', '') center = head.replace(' ', '')
if center not in barcode_graph:
center = int(center)
dg = Dgraph(center) dg = Dgraph(center)
# Reload halves # Reload halves
h1 = [x for x in h1.split(',')] h1 = [x for x in h1.split(',')]
h1 = [int(x) if x not in barcode_graph else x for x in h1]
h2 = [x for x in h2.split(',')] h2 = [x for x in h2.split(',')]
h2 = [int(x) if x not in barcode_graph else x for x in h2]
dg.put_halves(h1, h2, barcode_graph) dg.halves[0] = h1
dg.node_set.update(h1)
dg.nodes.extend(h1)
dg.halves[1] = h2
dg.node_set.update(h2)
dg.nodes.extend(h2)
return dg return dg
""" Compute the d-graph quality (score) according to the connectivity between the two halves.
@param h1 First half of the d-graph
@param h2 Second half of the d-graph
@param graph The connectivity graph
"""
def put_halves(self, h1, h2, graph): def put_halves(self, h1, h2, graph):
""" Compute the d-graph quality (score) according to the connectivity between the two halves.
:param h1: First half of the d-graph
:param h2: Second half of the d-graph
:param graph: The barcode graph
"""
self.score = 0 self.score = 0
# First half
self.halves[0] = h1 self.halves[0] = h1
for node in h1: self.node_set.update(h1)
self.node_set.add(node) self.nodes.extend(h1)
self.nodes.append(node) self.connexity[0] = {key: 0 for key in h1}
# second half
self.halves[1] = h2 self.halves[1] = h2
for node in h2: self.node_set.update(h2)
self.node_set.add(node) self.nodes.extend(h2)
self.nodes.append(node) self.connexity[1] = {key: 0 for key in h2}
# self.nodes = sorted([self.center] + self.halves[0] + self.halves[1])
self.connexity[0] = {key: 0 for key in self.halves[0]}
self.connexity[1] = {key: 0 for key in self.halves[1]}
self.edges = []
# Compute link arities # Compute link arities
for node1 in self.halves[0]: for node1 in self.halves[0]:
...@@ -76,19 +73,10 @@ class Dgraph(object): ...@@ -76,19 +73,10 @@ class Dgraph(object):
self.connexity[0][node1] += 1 self.connexity[0][node1] += 1
self.connexity[1][node2] += 1 self.connexity[1][node2] += 1
# Compute links from the center to the other nodes # Compute covering variable
for idx, node1 in enumerate(self.nodes): subgraph = graph.subgraph(self.nodes)
for node2 in self.nodes[idx+1:]: for _, _, id in subgraph.edges.data("uniq_idx"):
if graph.has_edge(node1, node2): self.edges.add(id)
if node1 < node2:
self.edges.append((node1, node2))
else:
self.edges.append((node2, node1))
# Sort the halves by descending connexity
connex = self.connexity
self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
def get_link_divergence(self): def get_link_divergence(self):
return int(abs(self.score - self.get_optimal_score())) return int(abs(self.score - self.get_optimal_score()))
...@@ -104,7 +92,7 @@ class Dgraph(object): ...@@ -104,7 +92,7 @@ class Dgraph(object):
def to_ordered_lists(self): def to_ordered_lists(self):
if self.ordered_list is None: if self.ordered_list is None:
hands = [[],[]] hands = [[], []]
for idx in range(2): for idx in range(2):
prev_connectivity = -1 prev_connectivity = -1
for node in self.halves[idx]: for node in self.halves[idx]:
......
...@@ -8,18 +8,21 @@ from deconvolution.d2graph import d2_graph as d2 ...@@ -8,18 +8,21 @@ from deconvolution.d2graph import d2_graph as d2
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser(description='Transform a 10X barcode graph into a d2 graph. The program dig for the d-graphs and then merge them into a d2-graph.') parser = argparse.ArgumentParser(description='Transform a barcode graph into a lcp graph. The program dig for a set of lcps and then merge them into a lcp graph.')
parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.') parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.')
parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.") parser.add_argument('--output_prefix', '-o', default="", help="Output file prefix.")
parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation') parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation')
parser.add_argument('--debug', '-d', action='store_true', help="Debug") parser.add_argument('--debug', '-d', action='store_true', help="Debug")
parser.add_argument('--verbose', '-v', action='store_true', help="Verbose") parser.add_argument('--verbose', '-v', action='store_true', help="Verbose")
parser.add_argument('--edge_divergence_threshold', '-dt', default=0.25, type=float, help='Divergence threshold value to link two udgs in the d2-graph') parser.add_argument('--edge_divergence_threshold', '-dt', default=0.25, type=float, help='Divergence threshold value to link two udgs in the d2-graph')
parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)") parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)")
parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques") parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques")
parser.add_argument('--comtest', '-k', action='store_true', help="Enable [placeholder] community detection algorithm instead of max-cliques")
args = parser.parse_args() args = parser.parse_args()
if args.output_prefix == "":
args.output_prefix = ".".join(args.barcode_graph.split(".")[:-1]) + "_lcpg"
return args return args
def main(): def main():
...@@ -28,26 +31,10 @@ def main(): ...@@ -28,26 +31,10 @@ def main():
# debug = args.debug # debug = args.debug
filename = args.barcode_graph filename = args.barcode_graph
barcode_graph = nx.read_gexf(filename)
def dprint(s):
from datetime import datetime
t = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(t, s)
dprint("loading barcode graph")
if filename.endswith('.gexf'):
G = nx.read_gexf(filename)
elif filename.endswith('.graphml'):
G = nx.read_graphml(filename)
else:
print("Input file must be gexf or graphml formatted", file=sys.stderr)
exit(1)
dprint("barcode graph loaded")
if args.louvain: if args.louvain:
clique_mode = "louvain" clique_mode = "louvain"
elif args.comtest:
clique_mode = "testing"
else: else:
clique_mode = None clique_mode = None
...@@ -62,15 +49,15 @@ def main(): ...@@ -62,15 +49,15 @@ def main():
shutil.rmtree(debug_path) shutil.rmtree(debug_path)
os.mkdir(debug_path) os.mkdir(debug_path)
# Index size must be changed for general purpose. 8 is good for d=5 d2g = d2.D2Graph(debug=debug, debug_path=debug_path)
dprint("creating D2graph object") d2g.construct_from_barcodes(
d2g = d2.D2Graph(G, debug=debug, debug_path=debug_path) barcode_graph,
dprint("D2 graph object created") neighbor_threshold=args.edge_divergence_threshold,
dprint("constructing d2 graph from barcode graph") clique_mode=clique_mode,
d2g.construct_from_barcodes(neighbor_threshold=args.edge_divergence_threshold, clique_mode=clique_mode, threads=args.threads, verbose=args.verbose) threads=args.threads,
dprint("[debug] d2 graph constructed") verbose=args.verbose
)
# d2g.save(f"{args.output_prefix}.tsv")
nx.write_gexf(d2g, f"{args.output_prefix}.gexf") nx.write_gexf(d2g, f"{args.output_prefix}.gexf")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment