Skip to content
Snippets Groups Projects
Commit 566d08b3 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

starting an avaluation/debug script

parent ae5147d9
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
import sys
import argparse
from termcolor import colored
import networkx as nx
def parse_args():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('filename', type=str,
help='The output file to evalute')
args = parser.parse_args()
return args
def load_graph(filename):
if filename.endswith('.graphml'):
return nx.read_graphml(filename)
elif filename.endswith('.gexf'):
return nx.read_gexf(filename)
else:
print("Wrong file format. Require graphml or gefx format", file=sys.stderr)
exit()
""" Compute appearance frequencies from node names.
All the node names must be under the format :
{idx}:{mol1_id}_{mol2_id}_...{molx_id}.other_things_here
@param graph The networkx graph representinf the deconvolved graph
@param only_wong If True, don't print correct nodes
@param file_pointer Where to print the output. If set to stdout, then pretty print. If set to None, don't print anything.
@return A tuple containing two dictionaries. The first one with theoritical frequences of each node, the second one with observed frequencies.
"""
def parse_graph_frequencies(graph, only_wrong=False, file_pointer=sys.stdout):
# Compute origin nodes formated as `{idx}:{mol1_id}_{mol2_id}_...`
observed_frequences = {}
origin_node_names = []
for node in graph.nodes():
first_dot = node.find(".")
origin_name = node[:first_dot]
# Count frequency
if not origin_name in observed_frequences:
observed_frequences[origin_name] = 0
origin_node_names.append(origin_name)
observed_frequences[origin_name] += 1
# Compute wanted frequencies
theoritical_frequencies = {}
for node_name in origin_node_names:
_, composition = node_name.split(':')
mol_ids = composition.split('_')
# The node should be splited into the number of molecules inside itself
theoritical_frequencies[node_name] = len(mol_ids)
# Print results
if file_pointer != None:
print("--- Frequency analysis ---", file=file_pointer)
for key in theoritical_frequencies:
obs, the = observed_frequences[key], theoritical_frequencies[key]
result = f"{key}: {obs}/{the}"
if file_pointer == sys.stdout:
result = colored(result, 'green' if obs==the else 'red')
if only_wrong and obs==the:
continue
print(result, file=file_pointer)
return theoritical_frequencies, observed_frequences
def print_summary(frequencies, file_pointer=sys.stdout):
print("--- Global summary ---", file=file_pointer)
def main():
args = parse_args()
graph = load_graph(args.filename)
frequencies = parse_graph_frequencies(graph)
print_summary(frequencies)
if __name__ == "__main__":
main()
networkx>=2.2
\ No newline at end of file
networkx>=2.2
termcolor>=1.1
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment