diff --git a/Manifest.toml b/Manifest.toml index 84c6a4d2fb0bf89d829568da8b8273fbc65f14d6..41892f5aeb9a9a23c6b2761170d5bf15296e0d3d 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.9.2" +julia_version = "1.9.3" manifest_format = "2.0" project_hash = "2c20afabe03d014276e9478d0fdccbc2cdd634c1" @@ -312,11 +312,11 @@ version = "1.9.2" [[deps.PlanarLarvae]] deps = ["DelimitedFiles", "HDF5", "JSON3", "LinearAlgebra", "MAT", "Meshes", "OrderedCollections", "Random", "SHA", "StaticArrays", "Statistics", "StatsBase", "StructTypes"] -git-tree-sha1 = "25dede7c9e34786f3c9a576fc2da3c3448c12d80" +git-tree-sha1 = "6b2dc28d56bcef101672cbf2bb784bbd5d88d579" repo-rev = "main" -repo-url = "https://gitlab.pasteur.fr/nyx/planarlarvae.jl" +repo-url = "https://gitlab.pasteur.fr/nyx/PlanarLarvae.jl" uuid = "c2615984-ef14-4d40-b148-916c85b43307" -version = "0.14.0" +version = "0.15.0" [[deps.PrecompileTools]] deps = ["Preferences"] diff --git a/Project.toml b/Project.toml index 4731bc26eb3551067eaacf1e1b690ae5c67f448a..b4870abebb8cb3a3277536a52d2b1e1e43c87872 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TaggingBackends" uuid = "e551f703-3b82-4335-b341-d497b48d519b" authors = ["François Laurent", "Institut Pasteur"] -version = "0.15.3" +version = "0.16" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/pyproject.toml b/pyproject.toml index 3a7f66ef70140ee3e975ff4f210efee5f54b1815..7a6d08727061a101ce40d9717c880c7dc65083be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "TaggingBackends" -version = "0.15.3" +version = "0.16" description = "Backbone for LarvaTagger.jl tagging backends" authors = ["François Laurent"] diff --git a/scripts/confusion.py b/scripts/confusion.py new file mode 100644 index 0000000000000000000000000000000000000000..d8acf91ae47a7bd3e47d87eae226a7875d23504d --- /dev/null +++ b/scripts/confusion.py @@ -0,0 +1,86 @@ +import os +from glob import glob +import numpy as np +from sklearn.metrics import confusion_matrix +from taggingbackends.data.labels import Labels +from taggingbackends.data.dataset import LarvaDataset +from taggingbackends.explorer import BackendExplorer + +""" +Generic function for true labels possibly in the shape of tag arrays. +""" +def index(labels, tags): + if isinstance(tags, str): + if tags == 'edited': + # probably a (manual) tagging mistake + return -1 + else: + return labels.index(tags) + else: + for i, label in enumerate(labels): + if label in tags: + return i + print("Incompatible labels") + print(" expected labels:") + print(labels) + print(" labels at a time step:") + print(tags) + return -1 + +uid, gid = os.getenv('HOST_UID', None), os.getenv('HOST_GID', None) +if uid is not None: + uid, gid = int(uid), int(gid) + +labels = None +cm = None + +fn_true = 'groundtruth.label' +fn_pred = 'predicted.label' + +for assay, _, files in os.walk(f'/data'): + if any([fn == fn_true for fn in files]) and any([fn == fn_pred for fn in files]): + expected = Labels(os.path.join(assay, fn_true)) + predicted = Labels(os.path.join(assay, fn_pred)) + + if labels is None: + labels = predicted.labelspec + else: + assert labels == predicted.labelspec + + cm_ = None + for larva in expected: + # note: not all the larvae in `expected` may be in `predicted` + y_pred = np.array([labels.index(label) for label in predicted[larva].values()]) + y_true= np.array([index(labels, tags) for tags in expected[larva].values()]) + ok = 0 <= y_true + cm__ = confusion_matrix(y_true[ok], y_pred[ok], labels=range(len(labels))) + cm_ = cm__ if cm_ is None else cm_ + cm__ + cm = cm_ if cm is None else cm + cm_ + + assert cm_ is not None + + path = os.path.join(assay, 'confusion.csv') + with open(path, 'w') as f: + f.write(",".join(labels)) + for row in cm_: + f.write("\n") + f.write(",".join([str(count) for count in row])) + + if uid is not None: + os.chown(path, uid, gid) + +assert cm is not None +print('labels:') +print(labels) +print('confusion matrix:') +print(cm) + +precision = np.diag(cm) / cm.sum(axis=0) +recall = np.diag(cm) / cm.sum(axis=1) +assert np.all(0 < precision) +assert np.all(0 < recall) +f1score = 2 * precision * recall / (precision + recall) +print('f1-scores per class:') +print(f1score) +print('f1-score:') +print(np.mean(f1score)) diff --git a/src/taggingbackends/data/dataset.py b/src/taggingbackends/data/dataset.py index 0700b0ac2795c3b941b220f1a0ff5568bbc5990c..008a7af69cf265cbe33b17701304d34b10f455ae 100644 --- a/src/taggingbackends/data/dataset.py +++ b/src/taggingbackends/data/dataset.py @@ -26,16 +26,29 @@ class LarvaDataset: self.weight_classes = isinstance(balancing_strategy, str) and (balancing_strategy.lower() == 'auto') else: self.class_weights = class_weights + self._path = None """ *h5py.File*: *larva_dataset hdf5* file handler. """ @property def full_set(self): - if isinstance(self._full_set, (str, pathlib.Path)): - self._full_set = h5py.File(str(self._full_set), "r") + if not isinstance(self._full_set, h5py.File): + self._full_set = h5py.File(str(self.path), "r") return self._full_set """ + *pathlib.Path*: file path. + """ + @property + def path(self): + if self._path is None: + if isinstance(self._full_set, (str, pathlib.Path)): + self.path = self._full_set + return self._path + @path.setter + def path(self, p): + self._path = p if isinstance(p, pathlib.Path) else pathlib.Path(p) + """ *list* of *bytes*: Set of distinct labels. If the hdf5 file does not feature a top-level `labels` element that lists