diff --git a/evaluate.py b/evaluate.py new file mode 100755 index 0000000000000000000000000000000000000000..6a6ada53bd54dbfa63f3f4c5332f8f303bd162a1 --- /dev/null +++ b/evaluate.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + + +import sys +import argparse +from termcolor import colored + +import networkx as nx + + +def parse_args(): + parser = argparse.ArgumentParser(description='Process some integers.') + parser.add_argument('filename', type=str, + help='The output file to evalute') + + args = parser.parse_args() + + return args + + +def load_graph(filename): + if filename.endswith('.graphml'): + return nx.read_graphml(filename) + elif filename.endswith('.gexf'): + return nx.read_gexf(filename) + else: + print("Wrong file format. Require graphml or gefx format", file=sys.stderr) + exit() + + +""" Compute appearance frequencies from node names. + All the node names must be under the format : + {idx}:{mol1_id}_{mol2_id}_...{molx_id}.other_things_here + @param graph The networkx graph representinf the deconvolved graph + @param only_wong If True, don't print correct nodes + @param file_pointer Where to print the output. If set to stdout, then pretty print. If set to None, don't print anything. + @return A tuple containing two dictionaries. The first one with theoritical frequences of each node, the second one with observed frequencies. +""" +def parse_graph_frequencies(graph, only_wrong=False, file_pointer=sys.stdout): + # Compute origin nodes formated as `{idx}:{mol1_id}_{mol2_id}_...` + observed_frequences = {} + origin_node_names = [] + for node in graph.nodes(): + first_dot = node.find(".") + origin_name = node[:first_dot] + + # Count frequency + if not origin_name in observed_frequences: + observed_frequences[origin_name] = 0 + origin_node_names.append(origin_name) + observed_frequences[origin_name] += 1 + + # Compute wanted frequencies + theoritical_frequencies = {} + for node_name in origin_node_names: + _, composition = node_name.split(':') + + mol_ids = composition.split('_') + # The node should be splited into the number of molecules inside itself + theoritical_frequencies[node_name] = len(mol_ids) + + # Print results + if file_pointer != None: + print("--- Frequency analysis ---", file=file_pointer) + for key in theoritical_frequencies: + obs, the = observed_frequences[key], theoritical_frequencies[key] + result = f"{key}: {obs}/{the}" + + if file_pointer == sys.stdout: + result = colored(result, 'green' if obs==the else 'red') + + if only_wrong and obs==the: + continue + + print(result, file=file_pointer) + + return theoritical_frequencies, observed_frequences + + +def print_summary(frequencies, file_pointer=sys.stdout): + print("--- Global summary ---", file=file_pointer) + + + +def main(): + args = parse_args() + graph = load_graph(args.filename) + frequencies = parse_graph_frequencies(graph) + + print_summary(frequencies) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt index 2cfad90554a957ff4cfb554a010c4c0983c0e780..2bdd44f00cfd397b2a27d8733b49bb547d0be314 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -networkx>=2.2 \ No newline at end of file +networkx>=2.2 +termcolor>=1.1 \ No newline at end of file