diff --git a/Project.toml b/Project.toml index 5dbf3b2616df7ea4b67921f501acf9722250d0a6..4fefa105e15dca5838e8e13597c110207b17085b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TaggingBackends" uuid = "e551f703-3b82-4335-b341-d497b48d519b" authors = ["François Laurent", "Institut Pasteur"] -version = "0.7.1" +version = "0.7.2" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/pyproject.toml b/pyproject.toml index 5cab35d6fc8a3a3d121ac999bb46f879935af816..84e30fe3132801c1d9a3d26c05b4f9a0f8e9f24e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "TaggingBackends" -version = "0.7.1" +version = "0.7.2" description = "Backbone for LarvaTagger.jl tagging backends" authors = ["François Laurent"] diff --git a/src/taggingbackends/data/dataset.py b/src/taggingbackends/data/dataset.py index 80b02804c80359c9aa7bca99d2b3267beb2865ca..9cc670b5102e27d911806c50ff1a26132ef6dcb9 100644 --- a/src/taggingbackends/data/dataset.py +++ b/src/taggingbackends/data/dataset.py @@ -101,12 +101,13 @@ class LarvaDataset: train, val, test = torch.utils.data.random_split(TorchDataset(), [ntrain, nval, ntest], generator=self.generator) - self._training_set = iter(itertools.cycle( - torch.utils.data.DataLoader(train, - batch_size=self.batch_size, - shuffle=True, - generator=g_train, - drop_last=True))) + if 0 < ntrain: + self._training_set = iter(itertools.cycle( + torch.utils.data.DataLoader(train, + batch_size=self.batch_size, + shuffle=True, + generator=g_train, + drop_last=True))) self._validation_set = iter( torch.utils.data.DataLoader(val, batch_size=self.batch_size)) @@ -186,14 +187,14 @@ def subset_size(ntot, train_share, val_share, test_share): while ndelta < 0: if 0 < val_share: nval += 1 - ndelta -= 1 + ndelta += 1 if ndelta < 0: if 0 < test_share: ntest += 1 - ndelta -= 1 + ndelta += 1 if ndelta < 0: ntrain += 1 - ndelta -= 1 + ndelta += 1 while 0 < ndelta: if 0 < train_share: ntrain -= 1 diff --git a/src/taggingbackends/data/labels.py b/src/taggingbackends/data/labels.py index 6c17d77bc1eabf8945a7103c93397995bb5b377f..d741a89ecbecd4960365a2532469729c828d73e0 100644 --- a/src/taggingbackends/data/labels.py +++ b/src/taggingbackends/data/labels.py @@ -8,6 +8,54 @@ TIME_PRECISION = 0.001 labels_file_extension = (".nyxlabel", ".label", ".labels") +""" +Python interface to JSON *.label* files. + +It is a poor parent of Julia struct `Dataset` from `PlanarLarvae.jl`. + +The following example function opens such a *.label* file and loads the +associated track data assumed to be available as a *trx.mat* file. +It iterates over the tracks defined in the *.label* file. The same tracks are +expected to be found also in the *trx.mat* file. + +``` +import numpy as np +from sklearn.metrics import confusion_matrix + +def retagged_trxmat_confusion_matrix(label_file): + # load the .label and associated trx.mat files + predicted_labels = Labels(label_file) + labels = predicted_labels.labelspec + track_data_file = TrxMat(label_file.parent / 'trx.mat') + expected_labels = track_data_file.read(labels) + + # iterate the tracks + cm = None + for run, larva in predicted_labels: + + ypred = np.array([labels.index(label) + for label in predicted_labels[(run, larva)].values()]) + yexp = np.stack([expected_labels[label][run][larva] + for label in labels]) + iexp, yexp = np.nonzero(yexp.T==1) # the order of dimensions matters + ypred = ypred[iexp] # if the trx.mat file mentions more tags than the + # .label file, some timesteps may appear as untagged + + cm_ = confusion_matrix(yexp, ypred, labels=np.arange(len(labels))) + cm = cm_ if cm is None else cm + cm_ + return cm +``` + +Note that unlike the `TrxMat` class, keys in `Labels` objects are +(runid, trackid) pairs. +A `Labels` object will typically describe a single run, but can technically +represent several runs. + +The `labelspec` attribute is assumed to be a list, which is valid for *.label* +generated by automatic tagging. +Manual tagging will store label names as `Labels.labelspec['names']`, because +label colors are also stored in the `labelspec` attribute. +""" class Labels: def __init__(self, labels=None, labelspec=None, metadata=None, units=None, @@ -23,8 +71,12 @@ class Labels: self.labelspec, self.units = labelspec, units self._tracking = tracking self._input_labels = None + # + self.filepath = None # unused attribute; may help to manage data + # dependencies in the future if isinstance(labels, (str, pathlib.Path)): - self.load(labels) + self.filepath = pathlib.Path(labels) if isinstance(labels, str) else labels + self.load() @property def tracking(self): @@ -182,10 +234,12 @@ class Labels: else: json.dump(self, file, cls=LabelEncoder, indent=indent, **kwargs) - def load(self, file, format="json"): + def load(self, file=None, format="json"): if format != "json": raise ArgumentError(f"unsupported format: {format}") - if isinstance(file, str): + if file is None: + file = self.filepath + elif isinstance(file, str): file = pathlib.Path(file) if isinstance(file, pathlib.Path): if file.is_dir(): diff --git a/src/taggingbackends/explorer.py b/src/taggingbackends/explorer.py index 4680ab233b0f40d822098cc615c887f2c90de4fc..c8d123adee3324458ca7e256225783ab820b6067 100644 --- a/src/taggingbackends/explorer.py +++ b/src/taggingbackends/explorer.py @@ -181,6 +181,11 @@ Cannot find any Python package in project root directory: # Julia logging elif line and line[0] in "[┌│└": print(line) + # Torch logs + elif line == ' return torch._C._cuda_getDeviceCount() > 0': + # typically follows: + # UserWarning: CUDA initialization: CUDA unknown error + print(line) # tensorflow logs elif 26 < len(line): # assume line[:26] to be e.g.: 2022-05-17 18:48:15.120981