diff --git a/medvedev_repo_scripts/evaluate_barcode_graph.py b/medvedev_repo_scripts/evaluate_barcode_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cb25fa818be3ba5de77208b686914c676276297
--- /dev/null
+++ b/medvedev_repo_scripts/evaluate_barcode_graph.py
@@ -0,0 +1,36 @@
+# compares a barcode graph to ground truth
+# input: [tested.gpickle] [ground_truth.gpickle]
+
+
+import sys
+from collections import Counter
+from networkx import nx
+
+print("loading tested graph")
+tested = nx.read_gpickle(sys.argv[1])
+
+print("loading ground truth graph")
+ground = nx.read_gpickle(sys.argv[2])
+print("graphs loaded")
+
+common_nodes = set(tested.nodes()) & set(ground.nodes())
+
+print(len(common_nodes),"common nodes")
+
+tp,fp,fn = 0,0,0
+
+for edge in tested.edges():
+    if edge in ground.edges():
+        tp += 1
+    else:
+        fp += 1
+
+for edge in ground.edges():
+    if edge not in tested.edges():
+        fn += 1
+
+print("tested edges:",len(tested.edges()))
+print("ground edges:",len(ground.edges()))
+print("TP:",tp)
+print("FP:",fp)
+print("FN:",fn)
diff --git a/medvedev_repo_scripts/ground_truth_barcode_graph.py b/medvedev_repo_scripts/ground_truth_barcode_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa988df65fdbd7fce273d49b08905be6da975556
--- /dev/null
+++ b/medvedev_repo_scripts/ground_truth_barcode_graph.py
@@ -0,0 +1,185 @@
+# given a reads file produced by LRSIM and "longranger basic" and then passed through "lrsim_reinject_mi_in_barcoded_fastq.py" to get MI's
+#
+# construct ground truth barcode graph and molecule graph
+#
+# input: [reads_file.fastq.gz] [output_prefix]
+#
+# example:
+# zcat ~/data/10x-ecoli/fastqs/ecoli.fq.gz | python ground_truth_barcode_graph.py /dev/stdin graph
+
+import sys, gzip
+from Bio.SeqIO.QualityIO import FastqGeneralIterator
+from networkx import nx
+from ncls import NCLS 
+import numpy as np
+from collections import Counter
+
+reads_file = sys.argv[1]
+overlap_size = 9000
+
+if reads_file.endswith('.gz'):
+    opener = gzip.open
+else:
+    opener = open
+
+"""
+ get the molecule intervals for each barcode+MI combination
+"""
+counter =0
+highest_pos = 0
+molecule_extents = dict()
+nb_reads_per_mol = Counter()
+for title, seq, qual in FastqGeneralIterator(opener(reads_file,"rt")):
+    # fastq format, @[read id] [barcode]
+    line = title.strip().split()
+    read = line[0][1:].split('/')[0] #strip @ and '/1'
+    barcode = "NA"
+    mi = "NA"
+    for x in line:
+        if x.startswith('BX'):
+            barcode = x.strip("-1")
+        if x.startswith('MI'):
+            mi = x
+
+    if barcode == "NA" or mi == "NA":
+        continue
+
+    molecule = barcode + "-" + mi  # build a molecule graph actually
+
+    #print(read)
+    # WARNING: here if it crashes, just uncomment one of the two lines and comment the other
+    #chrom, posA, posB = read.split('_')[1:4] #specific to reference names with a "_"
+    chrom, posA, posB = read.split('_')[0:3] #specific to reference names without "_"
+    posA, posB = map(int,[posA,posB])
+    #print(posA,posB,barcode)
+
+    if posA > posB:
+        posA, posB = posB, posA
+    highest_pos = max(highest_pos,posB)
+
+    if molecule in molecule_extents:
+        oldchrom, oldA, oldB = molecule_extents[molecule] 
+    else:
+        oldA, oldB = posA, posB
+        oldchrom = chrom
+    if oldchrom != chrom: print("weird molecule placement",oldchrom,chrom,posA,posB,oldA,oldB)
+    molecule_extents[molecule] = (chrom, min(oldA,posA),max(oldB,posB))
+
+    nb_reads_per_mol[molecule] += 1
+
+    counter += 1
+    if counter % 100000 == 0:
+        sys.stderr.write(".")
+        sys.stderr.flush()
+
+# drop molecules that have <= 3 reads or have coverage < 0.05x (usually peak is supposed to be at 0.2x as per https://www.sciencedirect.com/science/article/pii/S2001037017300855) 
+# assuming 150bp reads
+nb_removed_low_cov_molecules = 0
+for molecule in nb_reads_per_mol:
+    chom, posA, posB = molecule_extents[molecule]
+    assert(posB >= posA)
+    molecule_length = posB-posA
+    if nb_reads_per_mol[molecule] <= 3:
+        del molecule_extents[molecule]
+        continue
+    if nb_reads_per_mol[molecule] * 150 <= 0.05 * molecule_length:
+        nb_removed_low_cov_molecules += 1
+        del molecule_extents[molecule]
+        continue
+if nb_removed_low_cov_molecules > 0:
+    print("removed",nb_removed_low_cov_molecules,"low-coverage molecules")
+
+
+#build the interval tree
+intervals = []
+starts = []
+ends = []
+vals = []
+for molecule in molecule_extents:
+    chrom, posA, posB = molecule_extents[molecule]
+
+    intervals += [(posA,posB,molecule)]
+    #tree.addi(posA,posB,barcode)
+    starts += [posA]
+    ends += [posB]
+    vals += [molecule]
+
+np_starts = np.array(starts, dtype=np.long)
+np_ends = np.array(ends, dtype=np.long)
+np_vals = np.array(range(len(vals)), dtype=np.long) #ncls doesn't associate strings, only integers, so i'm storing indices
+
+tree = NCLS(np_starts,np_ends,np_vals)
+
+print("tree constructed")
+
+
+# compute molecule overlap length
+# assuming they already intersect
+#
+#  a------b
+#     c------d
+#
+# or
+#
+# a-----------b
+#   c-------d
+#
+def overlap(a,b,c,d):
+    if a > c:
+        a,b,c,d = c,d,a,b
+    if c >= a and d <= b: #contained
+        return d-c
+    if c >= a and d >= b:
+        return b-c
+    else:
+        print("dunno how to overlap",a,b,c,d)
+        exit(1)
+
+
+# construct true barcode and molecule graphs
+mG =  nx.Graph()
+bG =  nx.Graph()
+
+from collections import defaultdict
+barcodes_labels = defaultdict(list)
+for molecule in molecule_extents:
+    chrom, posA, posB = molecule_extents[molecule]
+    label = "%s:%d-%d" %(chrom,posA,posB)
+    mG.add_node(molecule, label=molecule+";"+label)
+    barcode = molecule.split("-")[0]
+    barcodes_labels[barcode] += [label]
+
+for barcode in barcodes_labels:
+    #print("label",barcodes_labels[barcode])
+    long_label = ';'.join(barcodes_labels[barcode])
+    bG.add_node(barcode, label=long_label)
+
+for i, molecule in enumerate(molecule_extents):
+    chrom, posA, posB = molecule_extents[molecule]
+    results = tree.find_overlap(posA,posB)
+    #print(results)
+    for otherposA, otherposB, other_molecule in results:
+        other_molecule = vals[other_molecule]
+        otherchrom, otherposA_2, otherposB_2 = molecule_extents[other_molecule]
+        assert(otherposA == otherposA_2 and otherposB == otherposB_2)
+        if chrom == otherchrom and overlap(posA,posB,otherposA,otherposB) > overlap_size:
+            #print(posA,posB,molecule,"-",otherposA, otherposB, other_molecule)
+            mG.add_edge(molecule,other_molecule)
+            barcode      =       molecule.split("-")[0]
+            otherbarcode = other_molecule.split("-")[0]
+            edge_label= '%s:%d-%d %s:%d-%d' %(chrom,posA,posB,otherchrom,otherposA,otherposB)
+            bG.add_edge(barcode,otherbarcode, label=edge_label)
+    if i % 1000 == 1:
+        sys.stderr.write(".")
+        sys.stderr.flush()
+
+print("writing graphs")
+print(len(bG.nodes()),"nodes",len(bG.edges()),"edges for the barcode graph")
+print(len(mG.nodes()),"nodes",len(mG.edges()),"edges for the molecule graph")
+nx.write_graphml(bG, sys.argv[2]+".groundtruth_barcode.graphml") 
+nx.write_graphml(mG, sys.argv[2]+".groundtruth_molecule.graphml") 
+
+nx.write_gpickle(bG, sys.argv[2]+".groundtruth_barcode.gpickle") 
+nx.write_gpickle(mG, sys.argv[2]+".groundtruth_molecule.gpickle") 
+
+print("ground truth constructed")