Commit 3ca10697 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

previous scripts

parent 090d440e
# arguments: graphml file
# attempts to deconvolve a barcode graph using community detection
import sys
import itertools
import operator
from collections import Counter
from networkx import nx
#from networkx.algorithms import community
import community # python-louvain
import sys
filename = sys.argv[1]
G = None
if filename.endswith('.graphml'):
G = nx.read_graphml(filename)
elif filename.endswith('.gexf'):
G = nx.read_gexf(filename)
print(G)
def deconvolve2(G,node):
neighbors = list(G.neighbors(node))
print("node",node,len(neighbors),"neighbors")
G2 = nx.Graph(G.subgraph(neighbors))
# make sure there is a single connected components
while True:
ccs = list(nx.connected_components(G2))
if len(ccs) == 1:
break
# else artificially and weakly connect components to run fluid community
n1 = list(ccs[0])[0]
n2 = list(ccs[1])[0]
G2.add_edge(n1,n2)
#cliques = list(community.asyn_fluidc(G2,2))
#cliques = list(community.girvan_newman(G2)) # too slow
#cliques = list(community.label_propagation_communities(G2)) # not very accurate
#cliques = list(community.asyn_lpa_communities(G2)) # seems accurate but hands
cliques = community.best_partition(G2)
print([len([c for c,i in cliques.items() if i == clique_id]) for clique_id in set(cliques.values())])
if len(cliques) == 1:
return # nothing to deconvolve here
for node2,clique_id in cliques.items():
G.add_node(node+"-MI%d" % clique_id)
G.add_edge(node+"-MI%d" % clique_id, node2, contigs=(G[node][node2]['contigs'] if 'contigs' in G[node][node2] else ""))
# for debugging
G2.nodes[node2]['mi'] = clique_id
debug = True
if debug:
nx.write_graphml(G2,"test.graphml")
#exit(0)
G.remove_node(node)
g_nodes = list(G.nodes())
for node in g_nodes:
deconvolve2(G,node)
nx.write_graphml(G,sys.argv[1]+".deconvolved.graphml")
\ No newline at end of file
import networkx as nx
G = nx.Graph()
labels = list(range(30))
# create nodes
names = {}
for lab in labels:
G.add_node(lab)
names[lab] = lab
nx.set_node_attributes(G, names, "test")
# insert duplications
labels.insert(23, 7)
print(labels)
# create links
for i, lab in enumerate(labels):
for j in range(i+1, min(i+4, len(labels))):
G.add_edge(lab,labels[j])
nx.write_graphml(G, "simple_duplicated_3links.graphml")
#!/usr/bin/env python3
import networkx as nx
import sys
G = nx.read_graphml (sys.argv[1])
# label molecule nodes
labels = {}
for idx, node in enumerate(G.nodes()):
labels[node] = str(idx)
# artificially make barcodes
barcodes = []
n = len(G.nodes())
available_molecules = set(G.nodes())
m = 5 # m molecules per barcode
# Group molecules by barcode
import random
while len(available_molecules) > 0:
barcode = set(random.sample(available_molecules, m))
available_molecules -= barcode
# print(barcode)
barcodes += [barcode]
# Associate molecule to barcode
molecule_barcode = dict()
for barcode_index, barcode in enumerate(barcodes):
for mol in barcode:
molecule_barcode[mol] = barcode_index
print(molecule_barcode)
# Generate barcoded graph nodes
G2 = nx.Graph()
g2_labels = {}
for barcode_index, barcode_molecules in enumerate(barcodes):
bar_names = "_".join(barcode_molecules)
g2_labels[barcode_index] = f"{barcode_index}:{bar_names}"
G2.add_node(g2_labels[barcode_index])
# Generate barcoded graph edges
for mol_edge in G.edges():
m1, m2 = mol_edge
b1, b2 = g2_labels[molecule_barcode[m1]], g2_labels[molecule_barcode[m2]]
G2.add_edge(b1,b2)
# print(G2.edges)
output = sys.argv[1].replace("molecule", "barcodes").replace(".graphml", f"_{m}.gexf")
nx.write_gexf(G2, output)
#!/usr/bin/env python3
# generate a bunch of molecules, in the form of python intervals (a,b), i.e. a<=i<b. here, b-a=10
# then find their overlaps >= 1 bases
n = 16
o = 5
molecules_intervals = []
for i in range(n):
molecules_intervals += [(i,i+o)]
#print(molecules_intervals)
def overlap(a,b):
for i in range(*a):
if i in range(*b):
return True
return False
#print(overlap(molecules_intervals[0],molecules_intervals[2]))
#print(overlap(molecules_intervals[0],molecules_intervals[20]))
import networkx as nx
G = nx.Graph()
for i,a in enumerate(molecules_intervals):
G.add_node(i)
for i,a in enumerate(molecules_intervals):
for j,b in enumerate(molecules_intervals):
if i >= j: continue
if overlap(a,b):
G.add_edge(i,j)
print(G.edges())
nx.write_graphml(G, f"data/simulated_molecules_{n}_{o}.graphml")
#!/usr/bin/env python3
from networkx import nx
from copy import deepcopy
def compute_network_distances(graph, max_dist=3):
distances = {}
# Init all the distances
g_nodes = list(graph.nodes())
for node in g_nodes:
distances[node] = {node:0}
# Transmit distances over max_dist iterations
for _ in range(max_dist):
old_distances = deepcopy(distances)
# For each node, transmit list of distances
for node in g_nodes:
neighbors = list(graph.neighbors(node))
# for each neighbor of the current node, transmit all the distances already present.
for neighbor in neighbors:
for key, val in old_distances[node].items():
if not key in distances[neighbor]:
distances[neighbor][key] = val + 1
return distances
def print_distances(distances):
for key, val in distances.items():
print(key, len(val))
def evolved_distances(graph, node):
prev_distances = compute_network_distances(graph, 3)
# Remove node
neighbors = list(graph.neighbors(node))
graph.remove_node(node)
next_distances = compute_network_distances(graph, 3)
# Add removed node
graph.add_node(node)
for neighbor in neighbors:
graph.add_edge(node, neighbor)
# print distance evolutions between x and y nodes (different from node)
for origin, destinations in prev_distances.items():
if origin == node:
continue
for destination, distance in destinations.items():
if destination == node:
continue
if not destination in next_distances[origin]:
print(f"{origin}--{destination} {distance}->X")
elif next_distances[origin][destination] != distance:
print(f"{origin}--{destination} {distance}->{next_distances[origin][destination]}")
# if not dest in next_distances:
# print(f"{node}--{dest} {dist}->X")
# elif next_distances[dest] == dist:
# print(f"{node}--{dest} {dist}->{next_distances[dest]}")
graph = nx.read_graphml("simple_duplicated_3links.graphml")
# graph = nx.read_graphml("simulated_barcodes.graphml")
nodes = list(graph.nodes())
# nodes = ["7"]
for node in nodes:
print(f"node {node}")
evolved_distances(graph, node)
# print_distances(distances)
import networkx as nx
g = nx.read_graphml("simple_duplicated_3links.graphml")
# List all the nodes for adjacency tests.
for n, nbrs in g.adj.items():
neighbors = [x[0] for x in nbrs.items()]
to_split = False
# For all nodes B and C neighbors of A
for idx in range(len(neighbors)):
for jdx in range(idx+1, len(neighbors)):
ni = neighbors[idx]
nj = neighbors[jdx]
# Need to plit A in A' and A'' if B and C are not neighbors
if not ni in g.adj[nj]:
# print(f"{ni},{nj}")
to_split = True
# Split or not split, that is the question
print(f"{n} split ? {to_split}")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment