Commit 32eb0e52 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

add config values for snakefile data generation and describe them in the readme

parents 288449cc be448d6c
......@@ -13,6 +13,15 @@ To know how to use it please use the -h option.
* generate_fake_barcode_graph.py: Take a barcode graph as input (gexf formated) and outputs a barcode graph. The barcode graph is create by fusion of nodes from the molecule graph.
* use the snakefile "Snakemake_data_simu".
All the parameters can be an integer or a list of integer.
Each combination of parameter will generate a barcode graph
Config parameters:
* n: the number of initial molecules
* m: average number of node merged in each barcode
* d: average coverage of a molecule in the initial graph
* workdir: the directory to create and use as output
### Data structures and algorithms
* to_d2_graph.py: Mount a barcode graph into memory and create a d2 graph from it.
......
WORKDIR="snake_tests"
N=1000
D=5
M=2
WORKDIR="snake_exec" if "workdir" not in config else config["workdir"]
N=[1000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[5] if "d" not in config else config["d"] # Average coverage of each molecule
M=[2] if "m" not in config else config["m"] # Average number of molecule per barcode
rule all:
input:
f"{WORKDIR}/simu_bar_n{N}_d{D}_m{M}.gexf"
expand(f"{WORKDIR}/simu_bar_n{{n}}_d{{d}}_m{{m}}.gexf", n=N, m=M, d=D)
rule generate_barcodes:
input:
......
......@@ -59,7 +59,7 @@ class D2Graph(nx.Graph):
import debug_disct as dd
# Compute all the d-graphs
if verbose:
print("Compute the unit d-graphs")
print("Computing the unit d-graphs..")
self.d_graphs_per_node = compute_all_max_d_graphs(self.barcode_graph, debug=debug)
if verbose:
counts = sum(len(x) for x in self.d_graphs_per_node.values())
......
......@@ -244,6 +244,8 @@ def compute_all_max_d_graphs(graph, debug=False):
cliques = list(nx.find_cliques(neighbors_graph))
print("cliques", len(cliques))
if debug: print("node",node,"has",len(cliques),"cliques")
# Pair halves to create d-graphes
for idx, clq1 in enumerate(cliques):
for clq2_idx in range(idx+1, len(cliques)):
......
......@@ -10,25 +10,41 @@ import d2_graph as d2
def parse_arguments():
parser = argparse.ArgumentParser(description='Transform a 10X barcode graph into a d2 graph. The program dig for the d-graphs and then merge them into a d2-graph.')
parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.')
parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.")
parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.")
parser.add_argument('--debug', '-d', action='store_true', help="Debug")
args = parser.parse_args()
return args
def main():
# Parsing the input file
args = parse_arguments()
debug = args.debug
filename = args.barcode_graph
if not filename.endswith('.gexf'):
print("Input file must be gexf formatted", file=sys.stderr)
def dprint(s):
from datetime import datetime
t = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
if debug: print(t,"[debug]",s)
dprint("loading barcode graph")
if filename.endswith('.gexf'):
G = nx.read_gexf(filename)
elif filename.endswith('.graphml'):
G = nx.read_graphml(filename)
else:
print("Input file must be gexf or graphml formatted", file=sys.stderr)
exit(1)
dprint("barcode graph loaded")
G = nx.read_gexf(filename)
# Index size must be changed for general purpose. 8 is good for d=5
dprint("creating D2graph object")
d2g = d2.D2Graph(G)
d2g.construct_from_barcodes(index_size=4)
dprint("D2 graph object created")
dprint("constructing d2 graph from barcode graph")
d2g.construct_from_barcodes(index_size=8, debug=debug)
dprint("[debug] d2 graph constructed")
d2g.save(f"{args.output_prefix}.tsv")
nx.write_gexf(d2g, f"{args.output_prefix}.gexf")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment