clique_graph_eval.py 3.33 KB
Newer Older
Yoann Dufresne's avatar
Yoann Dufresne committed
1
import argparse
2
import time
Yoann Dufresne's avatar
Yoann Dufresne committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import networkx as nx
from collections import Counter

from experiments.CliqueGraph import CliqueGraph
from deconvolution.dgraph.CliqueDGFactory import CliqueDGFactory


def parse_arguments():
    parser = argparse.ArgumentParser(description="Tests on graph barcode")
    parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gexf formatted file.')

    args = parser.parse_args()
    return args


def is_continuous(barcode_multiset):
    # Save barcode provenance
    originated_barcode = {}
    for barcode in barcode_multiset:
        for molecule in barcode:
            originated_barcode[molecule] = barcode

    # Create a continuous array of molecule id
    ordered_molecules = list(originated_barcode.keys())
    ordered_molecules.sort()

    # Look for contiguous molecules
    nb_barcode = sum(barcode_multiset.values())
    for idx in range(nb_barcode, len(ordered_molecules)+1):
        first_idx = idx - nb_barcode
        last_idx = idx - 1

        # If the gap is to big continue
        if ordered_molecules[first_idx] + nb_barcode - 1 != ordered_molecules[last_idx]:
            continue

        # Verify the number of different barcode involved
        involved_barcodes = [originated_barcode[ordered_molecules[idx]] for idx in range(first_idx, idx)]
        involved_barcodes_multiset = Counter(involved_barcodes)
        if sum(involved_barcodes_multiset.values()) != nb_barcode:
            continue

        # Verify the barcode content equality
        involved_match = True
        for key, val in involved_barcodes_multiset.items():
            if barcode_multiset[key] != val:
                involved_match = False

        if involved_match:
            return True

    return False


def iterable_to_barcode_multiset(clique):
    barcodes = []
    for b in clique:
        ids = (int(x) for x in b.split(":")[1].split("_"))
        barcodes.append(ids)

    return Counter(barcodes)


def analyse_clique_graph(barcode_graph):
    clique_graph = CliqueGraph(barcode_graph)

    continuous = 0
    for clique in clique_graph.nodes():
        # Transform the clique in barcode set
        bms = iterable_to_barcode_multiset(clique)
        # Check the contiguity
        if is_continuous(bms):
            continuous += 1

    return continuous, len(clique_graph.nodes())


def analyse_d_graphs(barcode_graph):
    # Generate udgs
    factory = CliqueDGFactory(barcode_graph, 1)
83
    udg_per_node = factory.generate_all_dgraphs(threads=1)
Yoann Dufresne's avatar
Yoann Dufresne committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    # Remove duplicate udgs
    udgs = {}
    for udg_node_lst in udg_per_node.values():
        for udg in udg_node_lst:
            barcodes = (x for x in udg.to_sorted_list())
            bms = iterable_to_barcode_multiset(barcodes)
            udgs[barcodes] = bms

    continuous = 0
    for udg in udgs.values():
        if is_continuous(udg):
            continuous += 1

    return continuous, len(udgs)


def main():
    args = parse_arguments()
    g = nx.read_gexf(args.barcode_graph)
103
    prev_time = time.time()
Yoann Dufresne's avatar
Yoann Dufresne committed
104
    continuous, total = analyse_clique_graph(g)
105
    print("cliques", time.time() - prev_time)
Yoann Dufresne's avatar
Yoann Dufresne committed
106
    print(continuous, "/", total)
107
    prev_time = time.time()
Yoann Dufresne's avatar
Yoann Dufresne committed
108
    continuous, total = analyse_d_graphs(g)
109
    print("udgs", time.time() - prev_time)
Yoann Dufresne's avatar
Yoann Dufresne committed
110
111
112
113
114
    print(continuous, "/", total)



if __name__ == "__main__":
115
116
    # import cProfile
    # cProfile.run('main()')
Yoann Dufresne's avatar
Yoann Dufresne committed
117
    main()