Skip to content
Snippets Groups Projects
Commit b79c63d1 authored by rchikhi's avatar rchikhi
Browse files

barcode graphe generation from genomes tuning

parent 56839d4c
No related branches found
No related tags found
No related merge requests found
......@@ -19,27 +19,42 @@ def parse_arguments():
return args
def compute_ovl_length(s1,e1,s2,e2):
if s1 > s2:
s1,e1,s2,e2 = s2,e2,s1,e1
return e1-s2
def is_contained(itree,start,end):
for itv in itree.overlap(start,end):
start2, end2 = itv.begin, itv.end
if start2 <= start and end2 >= end:
return True
return False
def generate_graph(nb_molecules, genome_size, avg_mol_size, min_mol_ovl, rnd_seed=None):
# Reproducibility
if rnd_seed != -1:
np.random.seed(rnd_seed)
G = nx.Graph()
# generate molecules according to similar principle as LRSM
# but in addition make sure they're not contained in each other
molecules = dict() # mol index: (start,end)
itree = IntervalTree()
for idx in range(nb_molecules):
while True: # to avoid contained molecules
G.add_node(idx)
#pick a starting position (follows LRSIM)
start_pos = np.random.randint(0,genome_size)
start = np.random.randint(0,genome_size)
#pick a fragment size (follows LRSIM)
molecule_size = np.random.poisson(avg_mol_size)
print("molecule",idx,"start",start_pos,"end",start_pos+molecule_size)
molecules[idx] = (start_pos, start_pos+molecule_size)
# compute overlaps between molecules
itree = IntervalTree()
for idx in molecules:
start,end = molecules[idx]
itree.addi( start,end,idx)
end = start+molecule_size
if is_contained(itree,start,end): continue
print("molecule",idx,"start",start,"end",end)
molecules[idx] = (start, end)
itree.addi(start,end,idx)
break
# create graph edges corresponding to molecules overlaps
for idx in molecules:
......@@ -47,6 +62,10 @@ def generate_graph(nb_molecules, genome_size, avg_mol_size, min_mol_ovl, rnd_see
for itv in itree.overlap(start,end):
other_idx = itv.data
if idx==other_idx: continue
start2, end2 = itv.begin, itv.end
ovl_length = compute_ovl_length(start,end,start2,end2)
#print("overlap length",ovl_length)
if ovl_length < min_mol_ovl: continue
G.add_edge(idx, other_idx)
return G
......
......@@ -4,3 +4,4 @@ bidict
pytest
scipy
python-louvain
intervaltree
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment