diff --git a/deconvolution/main/generate_genome_molecule_graph.py b/deconvolution/main/generate_genome_molecule_graph.py index 08aa184c289bdec1bf74f827ae553ddb8eb99463..fa6974321bd25269707e59face5207ba15b8a2f6 100755 --- a/deconvolution/main/generate_genome_molecule_graph.py +++ b/deconvolution/main/generate_genome_molecule_graph.py @@ -19,27 +19,42 @@ def parse_arguments(): return args +def compute_ovl_length(s1,e1,s2,e2): + if s1 > s2: + s1,e1,s2,e2 = s2,e2,s1,e1 + return e1-s2 + +def is_contained(itree,start,end): + for itv in itree.overlap(start,end): + start2, end2 = itv.begin, itv.end + if start2 <= start and end2 >= end: + return True + return False + def generate_graph(nb_molecules, genome_size, avg_mol_size, min_mol_ovl, rnd_seed=None): # Reproducibility if rnd_seed != -1: np.random.seed(rnd_seed) G = nx.Graph() + + # generate molecules according to similar principle as LRSM + # but in addition make sure they're not contained in each other molecules = dict() #Â mol index: (start,end) + itree = IntervalTree() for idx in range(nb_molecules): + while True: #Â to avoid contained molecules G.add_node(idx) #pick a starting position (follows LRSIM) - start_pos = np.random.randint(0,genome_size) + start = np.random.randint(0,genome_size) #pick a fragment size (follows LRSIM) molecule_size = np.random.poisson(avg_mol_size) - print("molecule",idx,"start",start_pos,"end",start_pos+molecule_size) - molecules[idx] = (start_pos, start_pos+molecule_size) - - #Â compute overlaps between molecules - itree = IntervalTree() - for idx in molecules: - start,end = molecules[idx] - itree.addi( start,end,idx) + end = start+molecule_size + if is_contained(itree,start,end): continue + print("molecule",idx,"start",start,"end",end) + molecules[idx] = (start, end) + itree.addi(start,end,idx) + break #Â create graph edges corresponding to molecules overlaps for idx in molecules: @@ -47,6 +62,10 @@ def generate_graph(nb_molecules, genome_size, avg_mol_size, min_mol_ovl, rnd_see for itv in itree.overlap(start,end): other_idx = itv.data if idx==other_idx: continue + start2, end2 = itv.begin, itv.end + ovl_length = compute_ovl_length(start,end,start2,end2) + #print("overlap length",ovl_length) + if ovl_length < min_mol_ovl: continue G.add_edge(idx, other_idx) return G diff --git a/requirements.txt b/requirements.txt index 3fb587f7a54b9cb19acdca3c1d19c5f06b3edd47..1998447ede4681caae99ce11abff7918d6de17a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ bidict pytest scipy python-louvain +intervaltree