Commit 264169f4 authored by rchikhi's avatar rchikhi
Browse files

annotation of d2-graphs given ground truth

parent 4e175f2c
......@@ -13,8 +13,8 @@ def parse_args():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('filename', type=str,
help='The file to evalute')
parser.add_argument('--type', '-t', choices=["d2", "path"], default="path", required=True,
help="Define the data type to evaluate. Must be 'd2' or 'path'.")
parser.add_argument('--type', '-t', choices=["d2", "path", "d2-2annotate"], default="path", required=True,
help="Define the data type to evaluate. Must be 'd2' or 'path' or 'd2-2annotate' (Rayan's hack).")
parser.add_argument('--light-print', '-l', action='store_true',
help='Print only wrong nodes and paths')
parser.add_argument('--optimization_file', '-o',
......@@ -33,6 +33,17 @@ def load_graph(filename):
print("Wrong file format. Require graphml or gefx format", file=sys.stderr)
exit()
def save_graph(g, filename):
if filename.endswith('.graphml'):
return nx.write_graphml(g,filename)
elif filename.endswith('.gexf'):
return nx.write_gexf(g,filename)
else:
print("Wrong file format. Require graphml or gefx format", file=sys.stderr)
exit()
# ------------- Path Graph -------------
......@@ -378,6 +389,79 @@ def compute_covered_variables(optimization_file, path):
return vars
# returns True iff there exist x in mol1 such that there exists y in mol2 and |x-y| <= some_value
def nearby_udg_molecules(mols1, mols2):
for x in mols1:
for y in mols2:
if abs(x-y) <= 5:
return True
return False
def verify_graph_edges(d2_component):
udg_molecules_dict=dict()
for node in d2_component.nodes():
# Parse the current node name
head, c1, c2 = parse_dg_name(node)
# Construct the molecule(s) that this udg really 'reflects'
# i.e. the udg has a central node and two cliques
# that central nodes is the result of merging of several molecules
# ideally, only one of them is connected to the cliques
# (there could be more than one though; in that case the udg is 'ambiguous')
# udg_molecules aims to reflect the molecule(s) underlying this udg
udg_molecules = set()
# Get the current molecule idxs of central node
molecule_idxs = mols_from_node(head[1])
#print("mol idxs", molecule_idxs)
# Examine molecule idx's of cliques to see which are close to the central node
# rationale: c1/c2 contain nearby molecule id's
for mol_idx in molecule_idxs:
nexts = []
for c in [c1,c2]:
for c_node in c:
nei_mols = mols_from_node(c_node.split()[0])
nei_mols = [x for x in nei_mols if x > mol_idx] # fixme: also look at the <= molecules for more robustness
# If there are molecule next
if len(nei_mols) > 0:
next_nei_mol = min(nei_mols)
nexts.append((next_nei_mol))
nexts.sort(key=lambda x: x)
quality = sum([1.0/x if mol_idx+x in nexts else 0 for x in range(1,6)]) / sum([1.0/x for x in range(1,6)])
if quality > 0.6:
udg_molecules.add(mol_idx)
#print("mol",mol_idx,molecule_idxs,"quality",quality,"nexts",nexts)
udg_molecules_dict[head[0]]=udg_molecules
# Then: annotate edges as to whether they're real (their udg_molecule(s) are nearby) or fake
for n1, n2 , data in d2_component.edges(data=True):
# Parse the current node name
head, c1, c2 = parse_dg_name(n1)
node_udg_molecules = udg_molecules_dict[head[0]]
n_head, n_c1, n_c2 = parse_dg_name(n2)
neighbor_udg_molecules = udg_molecules_dict[n_head[0]]
if nearby_udg_molecules(node_udg_molecules, neighbor_udg_molecules):
color = 'green'
else:
color = 'red'
data['color'] = color
print("edge",node_udg_molecules,neighbor_udg_molecules,color)
# also, annotate nodes by their putative molecule found
for n, data in d2_component.nodes(data=True):
# Parse the current node name
head, c1, c2 = parse_dg_name(n)
node_udg_molecules = udg_molecules_dict[head[0]]
data['udg_molecule']= '_'.join(list(map(str,node_udg_molecules)))
def main():
args = parse_args()
......@@ -398,7 +482,20 @@ def main():
if args.optimization_file and len(args.optimization_file) > 0:
covered_vars = compute_covered_variables(args.optimization_file, longest_path)
print_d2_summary(components, longest_path, covered_vars=covered_vars, light_print=args.light_print)
# added by Rayan
# example:
# python evaluate.py --type d2-2annotate ~/Dropbox/cnrs/projects/10x-barcodes/yoann_to_cedric_ilp/d2_graph.gexf
elif args.type == "d2-2annotate":
components = list(nx.connected_components(graph))
components.sort(key=lambda x: -len(x))
component = graph.subgraph(components[0])
verify_graph_edges(component)
extension=args.filename.split('.')[-1]
base_filename='.'.join(args.filename.split('.')[:-1])
save_graph(component,base_filename+".verified."+extension)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment