graph_manipulator.py 3.79 KB
Newer Older
Yoann Dufresne's avatar
Yoann Dufresne committed
1
import networkx as nx
2
import random
Yoann Dufresne's avatar
Yoann Dufresne committed
3
4

def generate_d_graph_chain(size, d):
5
6
7
8
9
10
    """ Generate a d-graph chain (succession of unit d-graphs).
        If you slice any 2*d+1 part of the graph, it will be a unit d-graph
        :param size The number of nodes in the chain (should not be less than 2*d+1)
        :param d The number of connection on the left and on the right for any node
        :return The d-graph chain
    """
11
    return generate_approx_d_graph_chain(size, d, d)
Yoann Dufresne's avatar
Yoann Dufresne committed
12
13


14
def generate_approx_d_graph_chain(size, d_max, d_avg, size_reduction=0, rnd_seed=-1):
15
16
17
18
19
    """ Generate an almost d-graph chain (succession of unit d-graphs). Almost because they are d-graphs in average
        with a coverage variation.
        :param size The number of nodes in the chain (should not be less than 2*d+1)
        :param d_max The max number of connection on the left and on the right for any node
        :param d_avg The average d value in the graph (ie 2*d average coverage)
20
        :param size_reduction Randomly change the size of the molecule when created. 99% of the molecules will have a size over size*size_reduction
21
22
23
24
        :param rnd_seed Fix the random seed for reproducibility
        :return The d-graph chain
    """
    # Reproducibility
25
    if rnd_seed != -1:
26
27
28
29
30
31
32
33
34
35
        random.seed(rnd_seed)

    # Sample size computation
    sursample_needed = round(size * (2 * d_max - 2 * d_avg) / (4 * d_max))
    total_size = size + sursample_needed

    # Choose which idx to jump over
    to_skip = random.sample(range(total_size), sursample_needed)
    to_skip.sort()

36
37
38
39
    # Init the random size variation
    d_min = d_max*(1-size_reduction)
    std_dev = (d_max-d_min)/2.5

40
41
    # Generate sequence
    G = nx.Graph()
42
    previous_nodes = [None]* d_max * 2
43
44
45
46
47
48
49
50
51
    next_idx = 0
    for idx in range(total_size):
        if len(to_skip) > 0 and to_skip[0] == idx:
            to_skip.pop(0)
            previous_nodes.append(None)
        else:
            # Create the node
            G.add_node(next_idx)

52
53
54
55
56
57
58
            # size deviation computation
            nb_nodes_to_connect = round(random.gauss(d_max, std_dev))
            # Limit the deviation
            nb_nodes_to_connect = max(nb_nodes_to_connect, 1)
            nb_nodes_to_connect = min(nb_nodes_to_connect, 2 * d_max)
            # link the node with previous ones regarding size deviation
            for node_idx in previous_nodes[len(previous_nodes)-nb_nodes_to_connect:]:
59
60
61
62
63
64
65
66
67
68
69
70
71
                if node_idx is not None:
                    G.add_edge(next_idx, node_idx)

            # Update the state
            previous_nodes.append(next_idx)
            next_idx += 1

        # Remove oldest previous node
        previous_nodes.pop(0)

    return G


Yoann Dufresne's avatar
Yoann Dufresne committed
72
73
74
75
76
77
""" Merge 2 nodes of a graph G.
    The new node have edges from both of the previous nodes (dereplicated).
    If a link between node1 and node2 exist, it's discarded.
    @param G The graph to manipulate
    @param node1 First node to merge
    @param node2 Second node to merge
78
    @return The name of the new node in G
Yoann Dufresne's avatar
Yoann Dufresne committed
79
80
81
82
83
84
"""
def merge_nodes(G, node1, node2):
    # Create the new node
    new_node = f"{node1}_{node2}" if node1 < node2 else f"{node2}_{node1}"
    G.add_node(new_node)

85
86
    # List the edges to add
    neighbors = []
Yoann Dufresne's avatar
Yoann Dufresne committed
87
88
89
90
91
92
    for edge in G.edges(node1):
        neighbor = edge[0] if edge[0] != node1 else edge[1]

        if neighbor == node2:
            continue

93
        neighbors.append(neighbor)
Yoann Dufresne's avatar
Yoann Dufresne committed
94
95
96
97
98
99
100

    for edge in G.edges(node2):
        neighbor = edge[0] if edge[0] != node2 else edge[1]

        if neighbor == node1:
            continue

101
102
103
104
105
106
107
        neighbors.append(neighbor)

    # De-replicate neighbors
    neighbors = set(neighbors)

    # Add neighbors
    for neighbor in neighbors:
Yoann Dufresne's avatar
Yoann Dufresne committed
108
109
110
111
112
113
        G.add_edge(neighbor, new_node)

    # Remove previous nodes from the graph
    G.remove_node(node1)
    G.remove_node(node2)

114
    return new_node
Yoann Dufresne's avatar
Yoann Dufresne committed
115