Commit 71d73af0 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

Safe script for merging a molecule graph into a uniform barcode graph

parent a96e76f7
#!/usr/bin/env python3
import networkx as nx
import sys
import random
import argparse
import graph_manipulator as gm
G = nx.read_graphml (sys.argv[1])
# label molecule nodes
labels = {}
for idx, node in enumerate(G.nodes()):
labels[node] = str(idx)
def parse_arguments():
parser = argparse.ArgumentParser(description='Transform a 10X molecule graph into a 10X barcode graph.')
parser.add_argument('--merging_depth', '-m', type=int, required=True, help='Number of nodes to merge together')
parser.add_argument('--input_graph', '-i', required=True, help='A 10X molecule graph gexf formated.')
parser.add_argument('--output', '-o', help="Output filename")
parser.add_argument('--random_seed', '-s', type=int, help="If you want to fix the random seed for reproducibility")
# artificially make barcodes
barcodes = []
n = len(G.nodes())
available_molecules = set(G.nodes())
m = 3 # m molecules per barcode
args = parser.parse_args()
return args
# Group molecules by barcode
import random
while len(available_molecules) > 0:
barcode = set(random.sample(available_molecules, m))
available_molecules -= barcode
# print(barcode)
barcodes += [barcode]
# Associate molecule to barcode
molecule_barcode = dict()
for barcode_index, barcode in enumerate(barcodes):
for mol in barcode:
molecule_barcode[mol] = barcode_index
# Generate barcoded graph nodes
G2 = nx.Graph()
g2_labels = {}
for barcode_index, barcode_molecules in enumerate(barcodes):
bar_names = "_".join(barcode_molecules)
g2_labels[barcode_index] = f"{barcode_index}:{bar_names}"
# Generate barcoded graph edges
for mol_edge in G.edges():
m1, m2 = mol_edge
b1, b2 = g2_labels[molecule_barcode[m1]], g2_labels[molecule_barcode[m2]]
# print(G2.edges)
output = sys.argv[1].replace("molecule", "barcode").replace(".graphml", f"_{m}.gexf")
nx.write_gexf(G2, output)
""" Take a molecule d-graph chain and merge the nodes uniformly to obtain a barcode graph.
@param G A molecule graph
@param merging_depth The number of nodes to merge from the original graph to obtain one node of the barcode graph
@return The merged barcode graph
def fusion_graph(G, merging_depth):
nodes = list(G.nodes())
label = 0
bijective_labels = {}
for idx in range(0, len(nodes), merging_depth):
# Extract values to merge
sublist = nodes[idx : idx+merging_depth]
# Merge nodes
merged = sublist[0]
for sub_idx in range(1, len(sublist)):
merged = gm.merge_nodes(G, merged, sublist[sub_idx])
# Label the node
bijective_labels[merged] = f"{label}:{merged}"
label += 1
# Relabel all the nodes
G = nx.relabel_nodes(G, bijective_labels)
return G
def save_graph(G, outfile):
nx.write_gexf(G, outfile)
if __name__ == "__main__":
args = parse_arguments()
if args.random_seed:
G = nx.read_gexf(args.input_graph)
G = fusion_graph(G, args.merging_depth)
outfile = f"simulated_barcodes_{args.merging_depth}.gexf"
if args.output:
outfile = args.output
save_graph(G, outfile)
......@@ -31,6 +31,6 @@ if __name__ == "__main__":
outfile = f"simulated_molecules_{args.num_molecule}_{args.depth}.gexf"
if args.output:
outfile = args.outfile
outfile = args.output
save_graph(G, outfile)
......@@ -27,7 +27,7 @@ def generate_d_graph_chain(size, d):
@param G The graph to manipulate
@param node1 First node to merge
@param node2 Second node to merge
@return The modified graph G
@return The name of the new node in G
def merge_nodes(G, node1, node2):
# Create the new node
......@@ -63,5 +63,5 @@ def merge_nodes(G, node1, node2):
return G
return new_node
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment