max_overlaps=None# limits the number of overlaps we record per molecule (keeping only the longest ones)
iflen(sys.argv)>4:
max_overlaps=int(sys.argv[4])
print("using custom maximal number of overlaps per molecule %d"%max_overlaps)
equi=False# among the neighbors of a molecule, force to have as many molecules before than as after (i.e. prevents a molecule from only overlapping with those after itself) - useful in conjunction with max_overlaps
iflen(sys.argv)>5:
equi=True
print("asking for equi before/after overlap repartition")
ifreads_file.endswith('.gz'):
opener=gzip.open
...
...
@@ -48,8 +62,8 @@ for title, seq, qual in FastqGeneralIterator(opener(reads_file,"rt")):
#print(read)
# WARNING: here if it crashes, just uncomment one of the two lines and comment the other
#chrom, posA, posB = read.split('_')[1:4] #specific to reference names with a "_"
chrom,posA,posB=read.split('_')[0:3]#specific to reference names without "_"
chrom,posA,posB=read.split('_')[1:4]#specific to reference names with a "_"
#chrom, posA, posB = read.split('_')[0:3] #specific to reference names without "_"
posA,posB=map(int,[posA,posB])
#print(posA,posB,barcode)
...
...
@@ -154,6 +168,7 @@ for barcode in barcodes_labels:
long_label=';'.join(barcodes_labels[barcode])
bG.add_node(barcode,label=long_label)
overlaps=defaultdict(list)
fori,moleculeinenumerate(molecule_extents):
chrom,posA,posB=molecule_extents[molecule]
results=tree.find_overlap(posA,posB)
...
...
@@ -161,18 +176,43 @@ for i, molecule in enumerate(molecule_extents):