import sys
if len(sys.argv) < 3:
print("renames nodes of a simulated molecule graph into consecutive integers (for")
exit("usage: input output")
out = open(sys.argv[2],'w')
for line in open(sys.argv[1]):
# change all occurrences of
# xxxxNC_000913.3_100644_115643_0:0:0_0:0:0_14f1/1xxxx
# to:
# xxxx100644xxxx
while "NC_000913" in line:
s = line.split("NC_000913")
before = s[0] #we'll keep that
cur = s[1] #we'll modify that
if len(s) > 2:#we'll keep that too
after = "NC_000913" + 'NC_000913'.join(s[2:])
after = ""
# now we modify 'cur' which hopefully corresponds to [NC_00913].3_100644_115643_0:0:0_0:0:0_14f1/1....[the next NC_00913]
s = cur.split('_')
val = s[1] # this is the start coordinate of the molecule, save it
cur = '_'.join(s[2:]) # restore the rest of cur
s = cur.split('/')
rest = '/'.join(s[1:]) # skip until the next '/' char
rest = rest[1:]
cur = val+rest
line = before + cur + after
