Commit c880bb1d authored by rchikhi's avatar rchikhi
Browse files augmentations to remove bad nodes/edges from d2-graph

parent 264169f4
......@@ -451,7 +451,7 @@ def verify_graph_edges(d2_component):
color = 'red'
data['color'] = color
# also, annotate nodes by their putative molecule found
for n, data in d2_component.nodes(data=True):
......@@ -460,8 +460,35 @@ def verify_graph_edges(d2_component):
node_udg_molecules = udg_molecules_dict[head[0]]
data['udg_molecule']= '_'.join(list(map(str,node_udg_molecules)))
# aggressive: delete nodes which have either no found udg_molecule, or two udg_molecules
# turns out it's not a good strategy as the nodes with two udg_molecules are important to connect portions of graph
# but what if we magically keep those where the two adjacent molecules are close together
if True:
d2_component = d2_component.copy()
nodes_to_remove = []
for n, data in d2_component.nodes(data=True):
# Parse the current node name
head, c1, c2 = parse_dg_name(n)
if "_" in data['udg_molecule'] or data['udg_molecule'] == '':
if "_" in data['udg_molecule']:
m1, m2 = list(map(int,data['udg_molecule'].split("_")))
if abs(m2-m1) < 30: continue # don't remove that kind of nodes
nodes_to_remove += [n]
print("removed",len(nodes_to_remove),"bad nodes")
# aggressive: delete red edges
if True:
d2_component = d2_component.copy()
edges_to_remove = []
for n1, n2, data in d2_component.edges(data=True):
if data['color'] == 'red':
edges_to_remove += [(n1,n2)]
print("removed",len(edges_to_remove),"bad edges")
return d2_component
def main():
args = parse_args()
......@@ -491,7 +518,7 @@ def main():
components.sort(key=lambda x: -len(x))
component = graph.subgraph(components[0])
component = verify_graph_edges(component)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment