Commit 5183ab17 authored by Rayan  CHIKHI's avatar Rayan CHIKHI

../deconvolution/main/d2_to_path_simple.py

parent 0f44e223
WORKDIR="snake_exec" if "outdir" not in config else config["outdir"]
N=[10000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[10] if "d" not in config else config["d"] # Average coverage of each molecule
N=[5000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[5] if "d" not in config else config["d"] # Average coverage of each molecule
M=[3] if "m" not in config else config["m"] # Average number of molecule per barcode
M_DEV=[0] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
iter=1
......
......@@ -173,6 +173,7 @@ class D2Graph(nx.Graph):
self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
self.nodes[nodes[dg]]["udg"] = str(dg)
self.nodes[nodes[dg]]["central_node_barcode"] = str(dg).split(']')[0]+']'
# Create the edges from neighbor edges
for dg in self.all_d_graphs:
......
......@@ -81,6 +81,9 @@ class Path(list):
nx.write_gexf(d2p, filename)
# in addition, write the barcode order to a text file
#filename_order_txt = filename[:-len(".gexf")]+".txt"
def save_path_in_graph(self, filename):
d2c = self.d2g.clone()
for idx, udg in enumerate(self):
......
#!/usr/bin/env python3
import networkx as nx
import argparse
import sys
from deconvolution.d2graph import d2_graph as d2, path_optimization as po
def parse_arguments():
parser = argparse.ArgumentParser(description='Greedy construction of a path through the d2 graph.')
parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formatted file.')
parser.add_argument('d2_graph', help='d2 graph to reduce. Must be a gexf formatted file.')
parser.add_argument('--out_prefix', '-o', default="", help="Output file prefix.")
args = parser.parse_args()
if args.out_prefix == "":
args.out_prefix = '.'.join(args.d2_graph.split('.')[:-1])
return args
def main():
# Parsing the arguments and validate them
args = parse_arguments()
barcode_file = args.barcode_graph
d2_file = args.d2_graph
if (not barcode_file.endswith('.gexf')) or (not d2_file.endswith(".gexf")):
print("Inputs file must be gexf formatted", file=sys.stderr)
exit(1)
# Loading
G = nx.read_gexf(barcode_file)
d2g = d2.D2Graph(G)
d2g.load(d2_file)
# Take the principal component
largest_component_nodes = max(nx.connected_components(d2g), key=len)
largest_component = d2g.subgraph(largest_component_nodes)
# pick the first node of this component, find the farthest node to it
spl = nx.shortest_path_length(largest_component,list(largest_component.nodes())[0])
initial_node = max(spl.items(),key=lambda x:x[1])[0]
# now find the farthest node to it again
spl1 = nx.shortest_path_length(largest_component,initial_node)
one_extremity = max(spl.items(),key=lambda x:x[1])[0]
# and find the farthest node to it again
spl2 = nx.shortest_path_length(largest_component,one_extremity)
other_extremity = max(spl.items(),key=lambda x:x[1])[0]
path = nx.shortest_path(largest_component,'16885','22521')
solution = po.Solution(largest_component)
nodes = [largest_component.node_by_idx[int(x)] for x in path]
solution.add_path(nodes)
print(f"covering score: {solution.covering_score()}")
# solution.save_path_in_graph(f"{args.out_prefix}_d2_path.gexf")
solution.save_path(f"{args.out_prefix}_path.gexf")
solution.save_barcode_path(f"{args.out_prefix}_barcode_count.gexf")
print("Solution saved")
# from d2_path import d2_path_to_barcode_path
# d2_path_to_barcode_path(solution)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment