diff --git a/Manifest.toml b/Manifest.toml
index 84c6a4d2fb0bf89d829568da8b8273fbc65f14d6..41892f5aeb9a9a23c6b2761170d5bf15296e0d3d 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -1,6 +1,6 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.9.2"
+julia_version = "1.9.3"
 manifest_format = "2.0"
 project_hash = "2c20afabe03d014276e9478d0fdccbc2cdd634c1"
 
@@ -312,11 +312,11 @@ version = "1.9.2"
 
 [[deps.PlanarLarvae]]
 deps = ["DelimitedFiles", "HDF5", "JSON3", "LinearAlgebra", "MAT", "Meshes", "OrderedCollections", "Random", "SHA", "StaticArrays", "Statistics", "StatsBase", "StructTypes"]
-git-tree-sha1 = "25dede7c9e34786f3c9a576fc2da3c3448c12d80"
+git-tree-sha1 = "6b2dc28d56bcef101672cbf2bb784bbd5d88d579"
 repo-rev = "main"
-repo-url = "https://gitlab.pasteur.fr/nyx/planarlarvae.jl"
+repo-url = "https://gitlab.pasteur.fr/nyx/PlanarLarvae.jl"
 uuid = "c2615984-ef14-4d40-b148-916c85b43307"
-version = "0.14.0"
+version = "0.15.0"
 
 [[deps.PrecompileTools]]
 deps = ["Preferences"]
diff --git a/Project.toml b/Project.toml
index 4731bc26eb3551067eaacf1e1b690ae5c67f448a..b4870abebb8cb3a3277536a52d2b1e1e43c87872 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "TaggingBackends"
 uuid = "e551f703-3b82-4335-b341-d497b48d519b"
 authors = ["FranÃ§ois Laurent", "Institut Pasteur"]
-version = "0.15.3"
+version = "0.16"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
diff --git a/pyproject.toml b/pyproject.toml
index 3a7f66ef70140ee3e975ff4f210efee5f54b1815..7a6d08727061a101ce40d9717c880c7dc65083be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "TaggingBackends"
-version = "0.15.3"
+version = "0.16"
 description = "Backbone for LarvaTagger.jl tagging backends"
 authors = ["FranÃ§ois Laurent"]
 
diff --git a/scripts/confusion.py b/scripts/confusion.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8acf91ae47a7bd3e47d87eae226a7875d23504d
--- /dev/null
+++ b/scripts/confusion.py
@@ -0,0 +1,86 @@
+import os
+from glob import glob
+import numpy as np
+from sklearn.metrics import confusion_matrix
+from taggingbackends.data.labels import Labels
+from taggingbackends.data.dataset import LarvaDataset
+from taggingbackends.explorer import BackendExplorer
+
+"""
+Generic function for true labels possibly in the shape of tag arrays.
+"""
+def index(labels, tags):
+    if isinstance(tags, str):
+        if tags == 'edited':
+            # probably a (manual) tagging mistake
+            return -1
+        else:
+            return labels.index(tags)
+    else:
+        for i, label in enumerate(labels):
+            if label in tags:
+                return i
+    print("Incompatible labels")
+    print("  expected labels:")
+    print(labels)
+    print("  labels at a time step:")
+    print(tags)
+    return -1
+
+uid, gid = os.getenv('HOST_UID', None), os.getenv('HOST_GID', None)
+if uid is not None:
+    uid, gid = int(uid), int(gid)
+
+labels = None
+cm = None
+
+fn_true = 'groundtruth.label'
+fn_pred = 'predicted.label'
+
+for assay, _, files in os.walk(f'/data'):
+    if any([fn == fn_true for fn in files]) and any([fn == fn_pred for fn in files]):
+        expected = Labels(os.path.join(assay, fn_true))
+        predicted = Labels(os.path.join(assay, fn_pred))
+
+        if labels is None:
+            labels = predicted.labelspec
+        else:
+            assert labels == predicted.labelspec
+
+        cm_ = None
+        for larva in expected:
+            # note: not all the larvae in `expected` may be in `predicted`
+            y_pred = np.array([labels.index(label) for label in predicted[larva].values()])
+            y_true= np.array([index(labels, tags) for tags in expected[larva].values()])
+            ok = 0 <= y_true
+            cm__ = confusion_matrix(y_true[ok], y_pred[ok], labels=range(len(labels)))
+            cm_ = cm__ if cm_ is None else cm_ + cm__
+            cm = cm_ if cm is None else cm + cm_
+
+        assert cm_ is not None
+
+        path = os.path.join(assay, 'confusion.csv')
+        with open(path, 'w') as f:
+            f.write(",".join(labels))
+            for row in cm_:
+                f.write("\n")
+                f.write(",".join([str(count) for count in row]))
+
+        if uid is not None:
+            os.chown(path, uid, gid)
+
+assert cm is not None
+print('labels:')
+print(labels)
+print('confusion matrix:')
+print(cm)
+
+precision = np.diag(cm) / cm.sum(axis=0)
+recall = np.diag(cm) / cm.sum(axis=1)
+assert np.all(0 < precision)
+assert np.all(0 < recall)
+f1score = 2 * precision * recall / (precision + recall)
+print('f1-scores per class:')
+print(f1score)
+print('f1-score:')
+print(np.mean(f1score))
diff --git a/src/taggingbackends/data/dataset.py b/src/taggingbackends/data/dataset.py
index 0700b0ac2795c3b941b220f1a0ff5568bbc5990c..008a7af69cf265cbe33b17701304d34b10f455ae 100644
--- a/src/taggingbackends/data/dataset.py
+++ b/src/taggingbackends/data/dataset.py
@@ -26,16 +26,29 @@ class LarvaDataset:
             self.weight_classes = isinstance(balancing_strategy, str) and (balancing_strategy.lower() == 'auto')
         else:
             self.class_weights = class_weights
+        self._path = None
 
     """
     *h5py.File*: *larva_dataset hdf5* file handler.
     """
     @property
     def full_set(self):
-        if isinstance(self._full_set, (str, pathlib.Path)):
-            self._full_set = h5py.File(str(self._full_set), "r")
+        if not isinstance(self._full_set, h5py.File):
+            self._full_set = h5py.File(str(self.path), "r")
         return self._full_set
     """
+    *pathlib.Path*: file path.
+    """
+    @property
+    def path(self):
+        if self._path is None:
+            if isinstance(self._full_set, (str, pathlib.Path)):
+                self.path = self._full_set
+        return self._path
+    @path.setter
+    def path(self, p):
+        self._path = p if isinstance(p, pathlib.Path) else pathlib.Path(p)
+    """
     *list* of *bytes*: Set of distinct labels.
 
     If the hdf5 file does not feature a top-level `labels` element that lists