Skip to content
Snippets Groups Projects
Commit ef45018d authored by François  LAURENT's avatar François LAURENT
Browse files

implements #22

parent 50b0bff4
No related branches found
No related tags found
No related merge requests found
Pipeline #97336 passed
...@@ -10,7 +10,8 @@ Usage: tagging-backend [train|predict] --model-instance <name> ...@@ -10,7 +10,8 @@ Usage: tagging-backend [train|predict] --model-instance <name>
tagging-backend train ... --sample-size <N> --balancing-strategy <strategy> tagging-backend train ... --sample-size <N> --balancing-strategy <strategy>
tagging-backend train ... --frame-interval <I> --window-length <T> tagging-backend train ... --frame-interval <I> --window-length <T>
tagging-backend train ... --pretrained-model-instance <name> tagging-backend train ... --pretrained-model-instance <name>
tagging-backend predict ... --skip-make-dataset --sandbox <token> tagging-backend train ... --skip-make-dataset --skip-build-features
tagging-backend predict ... --make-dataset --build-features --sandbox <token>
`tagging-backend` typically is run using `poetry run`. `tagging-backend` typically is run using `poetry run`.
A name must be provided to identify the trained model and its location within A name must be provided to identify the trained model and its location within
...@@ -36,6 +37,10 @@ the `make_dataset` module is loaded and this may take quite some time due to ...@@ -36,6 +37,10 @@ the `make_dataset` module is loaded and this may take quite some time due to
dependencies (e.g. Julia FFI). The `--skip-make-dataset` option makes `train` dependencies (e.g. Julia FFI). The `--skip-make-dataset` option makes `train`
truly skip this step; the corresponding module is not loaded. truly skip this step; the corresponding module is not loaded.
Since version 0.8, `predict` makes `--skip-make-dataset` and
`--skip-build-features` the default behavior. As a counterpart, it admits
arguments `--make-dataset` and `--build-features`.
`--sandbox <token>` makes `tagging-backend` use a token instead of <name> as `--sandbox <token>` makes `tagging-backend` use a token instead of <name> as
directory name in data/raw, data/interim and data/processed. directory name in data/raw, data/interim and data/processed.
This is intended to prevent conflicts on running `predict` in parallel on This is intended to prevent conflicts on running `predict` in parallel on
...@@ -59,7 +64,7 @@ def main(fun=None): ...@@ -59,7 +64,7 @@ def main(fun=None):
input_files, labels = [], [] input_files, labels = [], []
sample_size = window_length = frame_interval = None sample_size = window_length = frame_interval = None
trxmat_only = reuse_h5files = False trxmat_only = reuse_h5files = False
skip_make_dataset = skip_build_features = False make_dataset = build_features = None
pretrained_model_instance = None pretrained_model_instance = None
sandbox = False sandbox = False
balancing_strategy = 'auto' balancing_strategy = 'auto'
...@@ -92,9 +97,13 @@ def main(fun=None): ...@@ -92,9 +97,13 @@ def main(fun=None):
elif sys.argv[k] == "--reuse-h5files": elif sys.argv[k] == "--reuse-h5files":
reuse_h5files = True reuse_h5files = True
elif sys.argv[k] == "--skip-make-dataset": elif sys.argv[k] == "--skip-make-dataset":
skip_make_dataset = True make_dataset = False
elif sys.argv[k] == "--skip-build-features": elif sys.argv[k] == "--skip-build-features":
skip_build_features = True build_features = False
elif sys.argv[k] == '--make-dataset':
make_dataset = True
elif sys.argv[k] == '--build-features':
build_features = True
elif sys.argv[k] == "--pretrained-model-instance": elif sys.argv[k] == "--pretrained-model-instance":
k = k + 1 k = k + 1
pretrained_model_instance = sys.argv[k] pretrained_model_instance = sys.argv[k]
...@@ -116,7 +125,11 @@ def main(fun=None): ...@@ -116,7 +125,11 @@ def main(fun=None):
if input_files: if input_files:
for file in input_files: for file in input_files:
backend.move_to_raw(file) backend.move_to_raw(file)
if not skip_make_dataset: if make_dataset is None and train_or_predict == 'train':
make_dataset = True
if build_features is None and train_or_predict == 'train':
build_features = True
if make_dataset:
make_dataset_kwargs = dict(labels_expected=train_or_predict == "train", make_dataset_kwargs = dict(labels_expected=train_or_predict == "train",
balancing_strategy=balancing_strategy) balancing_strategy=balancing_strategy)
if labels: if labels:
...@@ -134,7 +147,7 @@ def main(fun=None): ...@@ -134,7 +147,7 @@ def main(fun=None):
elif reuse_h5files: elif reuse_h5files:
logging.info("option --reuse-h5files is ignored in the absence of --trxmat-only") logging.info("option --reuse-h5files is ignored in the absence of --trxmat-only")
backend._run_script(backend.make_dataset, **make_dataset_kwargs) backend._run_script(backend.make_dataset, **make_dataset_kwargs)
if not skip_build_features: if build_features:
backend._run_script(backend.build_features) backend._run_script(backend.build_features)
if train_or_predict == "predict": if train_or_predict == "predict":
backend._run_script(backend.predict_model, trailing=unknown_args) backend._run_script(backend.predict_model, trailing=unknown_args)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment