From ef45018d242f0ce554fe390e4ab864cee9a9e863 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Laurent?= <francois.laurent@posteo.net>
Date: Wed, 8 Feb 2023 00:30:34 +0100
Subject: [PATCH] implements #22

---
 src/taggingbackends/main.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/taggingbackends/main.py b/src/taggingbackends/main.py
index a6c6d9c..31655f9 100644
--- a/src/taggingbackends/main.py
+++ b/src/taggingbackends/main.py
@@ -10,7 +10,8 @@ Usage:  tagging-backend [train|predict] --model-instance <name>
         tagging-backend train ... --sample-size <N> --balancing-strategy <strategy>
         tagging-backend train ... --frame-interval <I> --window-length <T>
         tagging-backend train ... --pretrained-model-instance <name>
-        tagging-backend predict ... --skip-make-dataset --sandbox <token>
+        tagging-backend train ... --skip-make-dataset --skip-build-features
+        tagging-backend predict ... --make-dataset --build-features --sandbox <token>
 
 `tagging-backend` typically is run using `poetry run`.
 A name must be provided to identify the trained model and its location within
@@ -36,6 +37,10 @@ the `make_dataset` module is loaded and this may take quite some time due to
 dependencies (e.g. Julia FFI). The `--skip-make-dataset` option makes `train`
 truly skip this step; the corresponding module is not loaded.
 
+Since version 0.8, `predict` makes `--skip-make-dataset` and
+`--skip-build-features` the default behavior. As a counterpart, it admits
+arguments `--make-dataset` and `--build-features`.
+
 `--sandbox <token>` makes `tagging-backend` use a token instead of <name> as
 directory name in data/raw, data/interim and data/processed.
 This is intended to prevent conflicts on running `predict` in parallel on
@@ -59,7 +64,7 @@ def main(fun=None):
         input_files, labels = [], []
         sample_size = window_length = frame_interval = None
         trxmat_only = reuse_h5files = False
-        skip_make_dataset = skip_build_features = False
+        make_dataset = build_features = None
         pretrained_model_instance = None
         sandbox = False
         balancing_strategy = 'auto'
@@ -92,9 +97,13 @@ def main(fun=None):
             elif sys.argv[k] == "--reuse-h5files":
                 reuse_h5files = True
             elif sys.argv[k] == "--skip-make-dataset":
-                skip_make_dataset = True
+                make_dataset = False
             elif sys.argv[k] == "--skip-build-features":
-                skip_build_features = True
+                build_features = False
+            elif sys.argv[k] == '--make-dataset':
+                make_dataset = True
+            elif sys.argv[k] == '--build-features':
+                build_features = True
             elif sys.argv[k] == "--pretrained-model-instance":
                 k = k + 1
                 pretrained_model_instance = sys.argv[k]
@@ -116,7 +125,11 @@ def main(fun=None):
         if input_files:
             for file in input_files:
                 backend.move_to_raw(file)
-        if not skip_make_dataset:
+        if make_dataset is None and train_or_predict == 'train':
+            make_dataset = True
+        if build_features is None and train_or_predict == 'train':
+            build_features = True
+        if make_dataset:
             make_dataset_kwargs = dict(labels_expected=train_or_predict == "train",
                                        balancing_strategy=balancing_strategy)
             if labels:
@@ -134,7 +147,7 @@ def main(fun=None):
             elif reuse_h5files:
                 logging.info("option --reuse-h5files is ignored in the absence of --trxmat-only")
             backend._run_script(backend.make_dataset, **make_dataset_kwargs)
-        if not skip_build_features:
+        if build_features:
             backend._run_script(backend.build_features)
         if train_or_predict == "predict":
             backend._run_script(backend.predict_model, trailing=unknown_args)
-- 
GitLab