Commit 2c22fc66 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

d2 graph basic analysis completed

parent 17971a94
......@@ -2,6 +2,7 @@
import sys
import csv
import argparse
from termcolor import colored
......@@ -16,6 +17,8 @@ def parse_args():
help="Define the data type to evaluate. Must be 'd2' or 'path'.")
parser.add_argument('--light-print', '-l', action='store_true',
help='Print only wrong nodes and paths')
parser.add_argument('--optimization_file', '-o',
help="If the main file is a d2, a file formated for optimization can be set. This file will be used to compute the coverage of the longest path on the barcode graph.")
args = parser.parse_args()
return args
......@@ -172,13 +175,52 @@ def parse_dg_name(name):
return (idx, central, score), h1, h2
def print_d2_summary(connected_components, light_print=False):
def path_to_jumps(path):
chuncks = []
prev_start = -1000
current_molecule = -1000
for mol, node in path:
# If there is a gap
if mol > current_molecule + 1:
chuncks.append((prev_start, current_molecule))
prev_start = mol
current_molecule = mol
# Add the last piece
chuncks.append((prev_start, current_molecule))
del chuncks[0]
return chuncks
def print_d2_summary(connected_components, longest_path, covered_vars={}, light_print=False):
print("--- Global summary ---")
print(f"Number of connected components: {len(connected_components)}")
print(f"Total number of nodes: {sum([len(x) for x in connected_components])}")
print(f"The 5 largest components: {[len(x) for x in connected_components][:5]}")
print("--- Largest component analysis ---")
# Get the list of node idx
path_dg_idx = [int(x[1].split(" ")[0]) for x in longest_path]
# print("\n".join(longest_path))
if not light_print:
print("Longest path for increasing molecule number:")
print(f"Size of the longest path: {len(longest_path)}")
print("Jumps in central nodes:")
print(f"Number of optimization variable coverage: {len(covered_vars)}")
nb_true = 0
falses = []
for idx, val in covered_vars.items():
if val:
nb_true += 1
print(f"Coverage: {nb_true}/{len(covered_vars)}")
......@@ -266,8 +308,7 @@ def backtrack_longest_path(node, molecule, longest_paths, path=[]):
if node == None:
return path
print(node, molecule)
path.append((molecule, node))
length, next_node, next_mol = longest_paths[node][molecule]
return backtrack_longest_path(next_node, next_mol, longest_paths, path)
......@@ -300,6 +341,42 @@ def recursive_longest_path(current_node, current_molecule, next_nodes, longest_p
return longest_paths[current_node][current_molecule]
def compute_covered_variables(optimization_file, path):
vars = None
var_assignments = {}
# Read optimization variables
with open(optimization_file) as of:
header = of.readline()
header = [int(x) for x in header.split(" ")]
nb_nodes = header[0]
nb_vars = header[1]
vars = {x:False for x in range(nb_vars)}
# nb_true = 0
# for x in vars.values():
# if x: nb_true += 1
# print(nb_true)
# exit()
for idx, line in enumerate(of):
# Stop at the end of nodes
if idx >= nb_nodes:
parsed = [int(x) for x in line.split(' ')]
var_assignments[parsed[0]] = parsed[1:]
# Read the path to cover the variables
for node in path:
node_idx = int(node[1].split(" ")[0])
for var_idx in var_assignments[node_idx]:
vars[var_idx] = True
return vars
def main():
args = parse_args()
graph = load_graph(args.filename)
......@@ -315,7 +392,10 @@ def main():
component = graph.subgraph(components[0])
longest_path = compute_longest_increasing_paths(component)
print_d2_summary(components, longest_path, light_print=args.light_print)
covered_vars = {}
if args.optimization_file and len(args.optimization_file) > 0:
covered_vars = compute_covered_variables(args.optimization_file, longest_path)
print_d2_summary(components, longest_path, covered_vars=covered_vars, light_print=args.light_print)
if __name__ == "__main__":
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment