Commit 2025c898 authored by Yoann Dufresne's avatar Yoann Dufresne

rename d2g to lcpg

parent cb354bcb
......@@ -10,12 +10,12 @@ from deconvolution.dgraph.CliqueDGFactory import CliqueDGFactory
from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory
class D2Graph(nx.Graph):
"""D2Graph (read it (d-graph)²)"""
class LcpGraph(nx.Graph):
"""LcpGraph"""
def __init__(self, debug=False, debug_path='.'):
super(D2Graph, self).__init__()
self.all_d_graphs = []
self.d_graphs_per_node = {}
super(LcpGraph, self).__init__()
self.all_lcp = []
self.lcp_per_node = {}
self.node_by_idx = {}
self.barcode_graph = None
self.index = None
......@@ -35,7 +35,7 @@ class D2Graph(nx.Graph):
def subgraph(self, nodes):
nodes = frozenset(nodes)
G = D2Graph(self.barcode_graph)
G = LcpGraph(self.barcode_graph)
G.barcode_edge_idxs = self.barcode_edge_idxs
# Add sub-nodes
......@@ -75,20 +75,20 @@ class D2Graph(nx.Graph):
# Compute all the d-graphs
if verbose:
print("Computing the unit d-graphs..")
dg_factory = None
lcp_factory = None
if clique_mode == "louvain":
dg_factory = LouvainDGFactory(self.barcode_graph)
lcp_factory = LouvainDGFactory(self.barcode_graph)
else:
dg_factory = CliqueDGFactory(self.barcode_graph, min_size_clique=min_size_clique, debug=self.debug, debug_path=self.debug_path)
self.d_graphs_per_node = dg_factory.generate_all_dgraphs(threads=threads, verbose=verbose)
lcp_factory = CliqueDGFactory(self.barcode_graph, min_size_clique=min_size_clique, debug=self.debug, debug_path=self.debug_path)
self.lcp_per_node = lcp_factory.generate_all_dgraphs(threads=threads, verbose=verbose)
if verbose:
counts = sum(len(x) for x in self.d_graphs_per_node.values())
counts = sum(len(x) for x in self.lcp_per_node.values())
print(f"\t {counts} computed d-graphs")
for d_graphs in self.d_graphs_per_node.values():
self.all_d_graphs.extend(d_graphs)
for d_graphs in self.lcp_per_node.values():
self.all_lcp.extend(d_graphs)
# Number the d_graphs
for idx, d_graph in enumerate(self.all_d_graphs):
for idx, d_graph in enumerate(self.all_lcp):
d_graph.idx = idx
self.node_by_idx[idx] = d_graph
......@@ -133,7 +133,7 @@ class D2Graph(nx.Graph):
dg = Dgraph.load(data["udg"], data["score"], data["barcode_edges"])
self.variables.update(dg.edges)
self.bidict_nodes[node] = dg
self.all_d_graphs.append(dg)
self.all_lcp.append(dg)
if dg.idx == -1:
dg.idx = int(node)
self.node_by_idx[dg.idx] = dg
......@@ -141,59 +141,33 @@ class D2Graph(nx.Graph):
self.bidict_nodes = bidict(self.bidict_nodes)
def create_index_from_tuples(self, tuple_size=3, verbose=True):
index = {}
if verbose:
print("\tIndex d-graphs")
for lst_idx, dg in enumerate(self.all_d_graphs):
if verbose:
sys.stdout.write(f"\r\t{lst_idx+1}/{len(self.all_d_graphs)}")
sys.stdout.flush()
nodelist = dg.to_sorted_list()
if len(nodelist) < tuple_size:
continue
# Generate all tuplesize-mers
for dmer in itertools.combinations(nodelist, tuple_size):
if dmer not in index:
index[dmer] = set()
index[dmer].add(dg)
if verbose:
print()
return index
def create_graph_from_node_neighborhoods(self, neighborhood_threshold=0.25):
nodes = {}
# Create the nodes of d2g from udgs
for dg in self.all_d_graphs:
nodes[dg] = dg.idx
self.add_node(nodes[dg])
for lcp in self.all_lcp:
nodes[lcp] = lcp.idx
self.add_node(nodes[lcp])
# Add covering barcode edges
barcode_edges = " ".join([str(x) for x in dg.edges])
self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
self.nodes[nodes[dg]]["udg"] = str(dg)
self.nodes[nodes[dg]]["central_node_barcode"] = str(dg.center)
barcode_edges = " ".join([str(x) for x in lcp.edges])
self.nodes[nodes[lcp]]["barcode_edges"] = barcode_edges
self.nodes[nodes[lcp]]["score"] = f"{lcp.score}/{lcp.get_optimal_score()}"
self.nodes[nodes[lcp]]["udg"] = str(lcp)
self.nodes[nodes[lcp]]["central_node_barcode"] = str(lcp.center)
# Create the edges from neighbor edges
for dg in self.all_d_graphs:
for node in dg.to_node_set():
if node == dg.center:
for lcp in self.all_lcp:
for node in lcp.to_node_set():
if node == lcp.center:
continue
entry = frozenset({node})
if entry in self.d_graphs_per_node:
colliding_dgs = self.d_graphs_per_node[entry]
for colliding_dg in colliding_dgs:
distance = dg.distance_to(colliding_dg)
distance_ratio = distance / (len(dg.nodes) + len(colliding_dg.nodes))
if entry in self.lcp_per_node:
colliding_dgs = self.lcp_per_node[entry]
for colliding_lcp in colliding_dgs:
distance = lcp.distance_to(colliding_lcp)
distance_ratio = distance / (len(lcp.nodes) + len(colliding_lcp.nodes))
if distance_ratio <= neighborhood_threshold:
self.add_edge(nodes[dg], nodes[colliding_dg], distance=distance)
self.add_edge(nodes[lcp], nodes[colliding_lcp], distance=distance)
# Filter out singletons
graph_nodes = list(nodes)
......@@ -203,45 +177,3 @@ class D2Graph(nx.Graph):
del nodes[n]
return bidict(nodes)
def create_graph_from_index(self):
nodes = {}
for dmer in self.index:
dgs = list(set(self.index[dmer]))
for d_idx, dg in enumerate(dgs):
# Create a node name
if dg not in nodes:
nodes[dg] = dg.idx
# Add the node
self.add_node(nodes[dg])
# Add covering barcode edges
barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges])
self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
self.nodes[nodes[dg]]["udg"] = str(dg)
# Add the edges
for prev_idx in range(d_idx):
prev_dg = dgs[prev_idx]
# Add on small distances
d = dg.distance_to(prev_dg)
if d <= min(len(dg.node_set)/2, len(prev_dg.node_set)/2):
self.add_edge(nodes[dg], nodes[prev_dg], distance=d)
return bidict(nodes)
def compute_distances(self):
for x, y, data in self.edges(data=True):
dg1 = self.node_by_idx[x]
dg2 = self.node_by_idx[y]
if dg1 == dg2:
continue
# Distance computing and adding in the dist dicts
d = dg1.distance_to(dg2)
data["distance"] = d
import networkx as nx
# from deconvolution.d2graph.d2_path import Unitig
# from deconvolution.lcpgraph.d2_path import Unitig
""" Remove unnecessary transitions
......
from deconvolution.d2graph.d2_path import Path
from deconvolution.lcpgraph.lcp_path import Path
""" Greedy algorithm. Start with th most probable unitig (ie lowest normalized penalty first and largest unitig for
equalities). Then extends on both side to the nearest interesting unitig.
......
import random
from collections import Counter
from deconvolution.d2graph.d2_path import Path
from deconvolution.lcpgraph.lcp_path import Path
class Optimizer:
......
......@@ -4,8 +4,8 @@ import networkx as nx
import argparse
import sys
from deconvolution.d2graph import d2_graph as d2
from deconvolution.d2graph import d2_algorithms as d2a
from deconvolution.lcpgraph import lcp_graph as d2
from deconvolution.lcpgraph import lcpg_algorithms as d2a
def parse_arguments():
......@@ -33,7 +33,7 @@ def main():
# Loading
print("--- lcp graph loading ---")
lcpg = d2.D2Graph()
lcpg = d2.LcpGraph()
lcpg.load(lcpg_name)
# Algorithms for reduction
......
......@@ -4,7 +4,7 @@ import networkx as nx
import argparse
import sys
from deconvolution.d2graph import d2_graph as d2, path_optimization as po
from deconvolution.lcpgraph import lcp_graph as d2, path_optimization as po
def parse_arguments():
......@@ -31,7 +31,7 @@ def main():
exit(1)
# Loading
lcpg = d2.D2Graph()
lcpg = d2.LcpGraph()
lcpg.load(lcpg_name)
# Take the principal component
......
......@@ -4,8 +4,8 @@ import networkx as nx
import argparse
import sys
from deconvolution.d2graph import d2_graph as d2, path_optimization as po
from deconvolution.d2graph.d2_path import Path
from deconvolution.lcpgraph import lcp_graph as d2, path_optimization as po
from deconvolution.lcpgraph.lcp_path import Path
def parse_arguments():
......@@ -34,7 +34,7 @@ def main():
# Loading
G = nx.read_gexf(barcode_file)
d2g = d2.D2Graph(G)
d2g = d2.LcpGraph(G)
d2g.load(d2_file)
# Take the principal component
......
......@@ -4,7 +4,7 @@ import networkx as nx
import argparse
import sys
from deconvolution.d2graph import d2_graph as d2
from deconvolution.lcpgraph import lcp_graph as d2
def parse_arguments():
......@@ -49,7 +49,7 @@ def main():
shutil.rmtree(debug_path)
os.mkdir(debug_path)
d2g = d2.D2Graph(debug=debug, debug_path=debug_path)
d2g = d2.LcpGraph(debug=debug, debug_path=debug_path)
d2g.construct_from_barcodes(
barcode_graph,
neighbor_threshold=args.edge_divergence_threshold,
......
......@@ -3,8 +3,8 @@ from distutils.core import setup
setup(
name='10X-deconvolve',
version='0.1dev',
packages=['deconvolution.d2graph', 'deconvolution.dgraph', 'deconvolution.main', 'experiments', 'tests'],
version='0.2dev',
packages=['deconvolution.lcpgraph', 'deconvolution.dgraph', 'deconvolution.main', 'experiments', 'tests'],
license='AGPL V3',
long_description=open('README.md').read(),
)
......@@ -3,7 +3,7 @@ import tempfile
import networkx as nx
from scipy.special import comb
from deconvolution.d2graph.d2_graph import D2Graph
from deconvolution.lcpgraph.lcp_graph import LcpGraph
from deconvolution.dgraph import graph_manipulator as gm
......@@ -14,18 +14,18 @@ class TestD2Graph(unittest.TestCase):
# size = 2 * d + 3
#
# G = gm.generate_d_graph_chain(size, d)
# d2 = D2Graph(G)
# d2 = LcpGraph(G)
# print("before", d)
# d2.construct_from_barcodes(neighbor_threshold=0, min_size_clique=d, verbose=False)
# print("after", d)
#
# # for dg in d2.all_d_graphs:
# # for dg in d2.all_lcp:
# # print(dg.score, dg.get_link_divergence(), dg)
# # print()
#
# # Test the number of d-graphs
# awaited_d_num = size - 2 * d
# self.assertEqual(awaited_d_num, len(d2.all_d_graphs))
# self.assertEqual(awaited_d_num, len(d2.all_lcp))
#
# # Test connectivity
# # Center node names
......@@ -48,14 +48,14 @@ class TestD2Graph(unittest.TestCase):
def test_no_variability(self):
barcode_graph = nx.read_gexf("test_data/bar_1000_5_2.gexf")
d2 = D2Graph(barcode_graph)
d2 = LcpGraph(barcode_graph)
d2.construct_from_barcodes()
udgs = d2.all_d_graphs
udgs = d2.all_lcp
for _ in range(5):
d2 = D2Graph(barcode_graph)
d2 = LcpGraph(barcode_graph)
d2.construct_from_barcodes()
self.assertEqual(len(udgs), len(d2.all_d_graphs))
self.assertEqual(len(udgs), len(d2.all_lcp))
def test_reloading(self):
......@@ -65,7 +65,7 @@ class TestD2Graph(unittest.TestCase):
# Create a d2 graph
G = gm.generate_d_graph_chain(size, d)
d2 = D2Graph(G)
d2 = LcpGraph(G)
d2.construct_from_barcodes(verbose=False)
# Save and reload the d2 in a temporary file
......@@ -74,7 +74,7 @@ class TestD2Graph(unittest.TestCase):
nx.write_gexf(d2, fp.name)
# Reload
d2_reloaded = D2Graph(G)
d2_reloaded = LcpGraph(G)
d2_reloaded.load(fp.name)
# Test the nx graph
......@@ -84,11 +84,11 @@ class TestD2Graph(unittest.TestCase):
# TODO: Verify distances
# Test all_d_graphs
self.assertEqual(len(d2_reloaded.all_d_graphs), len(d2.all_d_graphs))
# Test all_lcp
self.assertEqual(len(d2_reloaded.all_lcp), len(d2.all_lcp))
# Verify dg idxs
reloaded_idxs = [dg.idx for dg in d2_reloaded.all_d_graphs]
for dg in d2.all_d_graphs:
reloaded_idxs = [dg.idx for dg in d2_reloaded.all_lcp]
for dg in d2.all_lcp:
self.assertTrue(dg.idx in reloaded_idxs)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment