Commit b79c63d1 authored by rchikhi's avatar rchikhi
barcode graphe generation from genomes tuning

parent 56839d4c
......@@ -19,27 +19,42 @@ def parse_arguments():
return args
def compute_ovl_length(s1,e1,s2,e2):
if s1 > s2:
s1,e1,s2,e2 = s2,e2,s1,e1
return e1-s2
def is_contained(itree,start,end):
for itv in itree.overlap(start,end):
start2, end2 = itv.begin, itv.end
if start2 <= start and end2 >= end:
return True
return False
def generate_graph(nb_molecules, genome_size, avg_mol_size, min_mol_ovl, rnd_seed=None):
# Reproducibility
if rnd_seed != -1:
G = nx.Graph()
# generate molecules according to similar principle as LRSM
# but in addition make sure they're not contained in each other
molecules = dict() # mol index: (start,end)
itree = IntervalTree()
for idx in range(nb_molecules):
while True: # to avoid contained molecules
#pick a starting position (follows LRSIM)
start_pos = np.random.randint(0,genome_size)
start = np.random.randint(0,genome_size)
#pick a fragment size (follows LRSIM)
molecule_size = np.random.poisson(avg_mol_size)
molecules[idx] = (start_pos, start_pos+molecule_size)
# compute overlaps between molecules
itree = IntervalTree()
for idx in molecules:
start,end = molecules[idx]
itree.addi( start,end,idx)
end = start+molecule_size
if is_contained(itree,start,end): continue
molecules[idx] = (start, end)
# create graph edges corresponding to molecules overlaps
for idx in molecules:
......@@ -47,6 +62,10 @@ def generate_graph(nb_molecules, genome_size, avg_mol_size, min_mol_ovl, rnd_see
for itv in itree.overlap(start,end):
other_idx =
if idx==other_idx: continue
start2, end2 = itv.begin, itv.end
ovl_length = compute_ovl_length(start,end,start2,end2)
#print("overlap length",ovl_length)
if ovl_length < min_mol_ovl: continue
G.add_edge(idx, other_idx)
return G
