diff --git a/Project.toml b/Project.toml index a9a1942dc3ed35be76e2d25c9ce017a876c2be4f..4f052cf5204ae8709c638e5f1e5b4146a325a99f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TaggingBackends" uuid = "e551f703-3b82-4335-b341-d497b48d519b" authors = ["François Laurent", "Institut Pasteur"] -version = "0.11.1" +version = "0.12" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/pyproject.toml b/pyproject.toml index 734c7134be041b1a4874b5f72677545a33af109a..bcd821c30c78d91f3a70d0b9b279d92bef2c9645 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "TaggingBackends" -version = "0.11.1" +version = "0.12.0" description = "Backbone for LarvaTagger.jl tagging backends" authors = ["François Laurent"] diff --git a/src/taggingbackends/data/labels.py b/src/taggingbackends/data/labels.py index e2b93ad59c98dc9e2e86584be438592b5bcf8bb7..a962b57bf18fae3ece4da2ab3ea82197daca793f 100644 --- a/src/taggingbackends/data/labels.py +++ b/src/taggingbackends/data/labels.py @@ -66,7 +66,9 @@ because label colors are also stored in the `labelspec` attribute. With taggingbackends==0.9, a related attribute was introduced: `secondarylabelspec`. To get a unique array of indexable labels, both primary -and secondary labels in a same array, use `full_label_list` instead. +and secondary labels in a same array, use either `encoding_label_list` or +`decoding_label_list` instead. The two properties typically equal, unless +attribute `decodingspec` is defined. """ class Labels: @@ -84,6 +86,7 @@ class Labels: self.secondarylabelspec = None self._tracking = tracking self._input_labels = None + self.decodingspec = None # self.filepath = None # unused attribute; may help to manage data # dependencies in the future @@ -177,8 +180,10 @@ class Labels: elif len(key) == 3: run, larva, timestamp = key self.set_timestep(run, larva, timestamp, value) + elif len(key) < 1: + raise IndexError("too few dimensions") else: - raise IndexError("too few of many dimensions") + raise IndexError("too many dimensions") else: run = key self.set_run(run, value) @@ -264,6 +269,7 @@ class Labels: self.labels, self.metadata = new_self.labels, new_self.metadata self.labelspec, self.units = new_self.labelspec, new_self.units self.secondarylabelspec = new_self.secondarylabelspec + # no `decodingspec` in label files self.tracking = new_self.tracking return self @@ -306,6 +312,7 @@ class Labels: self.units = data.get("units", {}) self.labelspec = data.get("labels", {}) self.secondarylabelspec = data.get("secondarylabels", []) + self.decodingspec = None # ensure it is not set self._tracking = data.get("dependencies", []) if isinstance(self._tracking, dict): self._tracking = [self._tracking] @@ -322,7 +329,7 @@ class Labels: List of str: all different labels including primary and secondary labels. """ @property - def full_label_list(self): + def encoding_label_list(self): if isinstance(self.labelspec, dict): labelset = self.labelspec['names'] else: @@ -331,6 +338,18 @@ class Labels: labelset = labelset + self.secondarylabelspec return labelset + """ + List of str: all different labels including primary and secondary labels. + """ + @property + def decoding_label_list(self): + labelspec = self.decodingspec + if labelspec is None: + return self.encoding_label_list + if self.secondarylabelspec: + labelset = labelset + self.secondarylabelspec + return labelset + """ Encode the text labels as indices (`int` or `list` of `int`). @@ -345,7 +364,7 @@ class Labels: elif isinstance(label, dict): encoded = {t: self.encode(l) for t, l in label.items()} else: - labelset = self.full_label_list + labelset = self.encoding_label_list if isinstance(label, str): encoded = labelset.index(label) + 1 elif isinstance(label, int): @@ -358,7 +377,25 @@ class Labels: """ Decode the label indices as text (`str` or `list` of `str`). - Text labels are picked in `labelspec`. + Text labels are picked in `labelspec`, or `decodingspec` instead if defined. + + `decodingspec` is set to decode the encoded output from a tagger that + defines redundant labels, which are remapped onto labels in `labelspec`. + These may include several sections in their config file, including + `original_behavior_labels`, `behavior_labels`, `remapped_behavior_labels`. + + Note that `decodingspec` should not be set to decode a label file. A label + file should use and mention the remapped labels only, if the latter are + defined by the tagger used to generate the label file. In the case of + decoding a label file, `labelspec` only should be defined in the `Labels` + object. + + To forget about these considerations, call `load_model_config` first if + processing the output of a tagger, or do not call that method otherwise. + + MaggotUBA's `predict_model` does not call `decode` but directly indexes into + `behavior_labels`. As a consequence, the above subtleties do not apply to + MaggotUBA. """ def decode(self, label=None): if label is None: @@ -368,7 +405,7 @@ class Labels: elif isinstance(label, dict): decoded = {t: self.decode(l) for t, l in label.items()} else: - labelset = self.full_label_list + labelset = self.decoding_label_list if isinstance(label, int): decoded = labelset[label-1] elif isinstance(label, str): @@ -378,6 +415,14 @@ class Labels: decoded = [labelset[l-1] for l in label] return decoded + def load_model_config(self, config): + try: + self.labelspec = config["remapped_behavior_labels"] + except KeyError: + self.labelspec = config["behavior_labels"] + else: + self.decodingspec = config["behavior_labels"] + class LabelEncoder(json.JSONEncoder): def default(self, labels): if isinstance(labels, Labels): diff --git a/src/taggingbackends/main.py b/src/taggingbackends/main.py index dde5335e0a03a7620aa8a334c59bc9fb800bc636..f530b149389f3eaba15cca571a46603acb349eed 100644 --- a/src/taggingbackends/main.py +++ b/src/taggingbackends/main.py @@ -14,6 +14,7 @@ Usage: tagging-backend [train|predict] --model-instance <name> tagging-backend train ... --skip-make-dataset --skip-build-features tagging-backend predict ... --make-dataset --build-features tagging-backend predict ... --sandbox <token> + tagging-backend --help `tagging-backend` typically is run using `poetry run`. A name must be provided to identify the trained model and its location within @@ -83,9 +84,9 @@ def main(fun=None): format="%(levelname)s:%(name)s: %(message)s") if fun is None: # called by scripts/tagging-backend - if not sys.argv[1:]: + if not sys.argv[1:] or any(arg == '--help' for arg in sys.argv): help(True) - sys.exit("too few input arguments; subcommand expected: 'train' or 'predict'") + #sys.exit("too few input arguments; subcommand expected: 'train' or 'predict'") return train_or_predict = sys.argv[1] project_dir = model_instance = None