Commit d0b9fb62 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

molecule generation variations

parent fedc3560
......@@ -10,7 +10,8 @@ def parse_arguments():
parser.add_argument('--num_molecule', '-n', type=int, required=True, help='The number of molecule in the final graph')
parser.add_argument('--max_depth', '-md', type=int, default=-1, help='The max number of molecule linked on each direction of the chain. if not specified, ceil(avg_depth).')
parser.add_argument('--avg_depth', '-ad', type=int, required=True, help='The average number of molecule linked on each direction of the chain.')
parser.add_argument('--rnd_seed', '-s', type=int, default=None, help='Random seed. Used for reproducibility purpose. Please do not use it in production.')
parser.add_argument('--size_reduction', '-r', type=float, default=0.0, help='99%% of molecule size will be between average_size*size_reduction')
parser.add_argument('--rnd_seed', '-s', type=int, default=-1, help='Random seed. Used for reproducibility purpose. Please do not use it in production.')
parser.add_argument('--output', '-o', help="Output filename")
args = parser.parse_args()
......@@ -21,8 +22,8 @@ def parse_arguments():
return args
def generate_graph(n, d_max, d_avg, rnd_seed=None):
return gm.generate_approx_d_graph_chain(n, d_max, d_avg, rnd_seed)
def generate_graph(n, d_max, d_avg, size_reduction, rnd_seed=None):
return gm.generate_approx_d_graph_chain(n, d_max, d_avg, size_reduction, rnd_seed)
# return gm.generate_d_graph_chain(n, d_max)
......@@ -33,7 +34,7 @@ def save_graph(G, outfile):
if __name__ == "__main__":
args = parse_arguments()
G = generate_graph(args.num_molecule, args.max_depth, args.avg_depth, args.rnd_seed)
G = generate_graph(args.num_molecule, args.max_depth, args.avg_depth, args.size_reduction, args.rnd_seed)
outfile = f"simulated_molecules_{args.num_molecule}_{args.avg_depth}.gexf"
if args.output:
......
......@@ -21,17 +21,18 @@ def generate_d_graph_chain(size, d):
return G
def generate_approx_d_graph_chain(size, d_max, d_avg, rnd_seed=None):
def generate_approx_d_graph_chain(size, d_max, d_avg, size_reduction=0, rnd_seed=-1):
""" Generate an almost d-graph chain (succession of unit d-graphs). Almost because they are d-graphs in average
with a coverage variation.
:param size The number of nodes in the chain (should not be less than 2*d+1)
:param d_max The max number of connection on the left and on the right for any node
:param d_avg The average d value in the graph (ie 2*d average coverage)
:param size_reduction Randomly change the size of the molecule when created. 99% of the molecules will have a size over size*size_reduction
:param rnd_seed Fix the random seed for reproducibility
:return The d-graph chain
"""
# Reproducibility
if rnd_seed:
if rnd_seed != -1:
random.seed(rnd_seed)
# Sample size computation
......@@ -42,9 +43,13 @@ def generate_approx_d_graph_chain(size, d_max, d_avg, rnd_seed=None):
to_skip = random.sample(range(total_size), sursample_needed)
to_skip.sort()
# Init the random size variation
d_min = d_max*(1-size_reduction)
std_dev = (d_max-d_min)/2.5
# Generate sequence
G = nx.Graph()
previous_nodes = [None]* d_max
previous_nodes = [None]* d_max * 2
next_idx = 0
for idx in range(total_size):
if len(to_skip) > 0 and to_skip[0] == idx:
......@@ -54,8 +59,13 @@ def generate_approx_d_graph_chain(size, d_max, d_avg, rnd_seed=None):
# Create the node
G.add_node(next_idx)
# link the node with previous ones
for node_idx in previous_nodes:
# size deviation computation
nb_nodes_to_connect = round(random.gauss(d_max, std_dev))
# Limit the deviation
nb_nodes_to_connect = max(nb_nodes_to_connect, 1)
nb_nodes_to_connect = min(nb_nodes_to_connect, 2 * d_max)
# link the node with previous ones regarding size deviation
for node_idx in previous_nodes[len(previous_nodes)-nb_nodes_to_connect:]:
if node_idx is not None:
G.add_edge(next_idx, node_idx)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment