From 9bf8f75f5da4dea25e4611dca3cb1ca635ae676a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Laurent?= <francois.laurent@posteo.net> Date: Sat, 1 Jul 2023 14:40:41 +0200 Subject: [PATCH] maggotuba time segment extension mechanism made optional --- Project.toml | 2 +- src/LarvaDatasets.jl | 8 ++++++-- src/taggingbackends/explorer.py | 7 ++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index cc1dc08..672f20b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TaggingBackends" uuid = "e551f703-3b82-4335-b341-d497b48d519b" authors = ["François Laurent", "Institut Pasteur"] -version = "0.14" +version = "0.14.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/LarvaDatasets.jl b/src/LarvaDatasets.jl index babf42d..dc37f42 100644 --- a/src/LarvaDatasets.jl +++ b/src/LarvaDatasets.jl @@ -555,6 +555,7 @@ function write_larva_dataset_hdf5(output_dir::String, includeall="edited", seed=nothing, distributed_sampling=true, + past_future_extensions=true, ) if distributed_sampling new_write_larva_dataset_hdf5(output_dir, input_data; @@ -570,8 +571,10 @@ function write_larva_dataset_hdf5(output_dir::String, fixmwt=fixmwt, frameinterval=frameinterval, includeall=includeall, + past_future_extensions=past_future_extensions, seed=seed) else + past_future_extensions || throw("not implemented") legacy_write_larva_dataset_hdf5(output_dir, input_data, window_length; labels=labels, labelpointers=labelpointers, @@ -595,6 +598,7 @@ function new_write_larva_dataset_hdf5(output_dir, input_data; balance=true, frameinterval=0.1, includeall="edited", + past_future_extensions=true, seed=nothing, kwargs...) repo = if input_data isa String @@ -608,7 +612,7 @@ function new_write_larva_dataset_hdf5(output_dir, input_data; end @assert !isnothing(frameinterval) window = TimeWindow(window_length * frameinterval, round(Int, 1 / frameinterval); - maggotuba_compatibility=true) + maggotuba_compatibility=past_future_extensions) selectors = isnothing(labels) ? getprimarylabels(first(Dataloaders.files(repo))) : labels min_max_ratio = balance ? 2 : 20 index = if isnothing(includeall) @@ -627,7 +631,7 @@ function new_write_larva_dataset_hdf5(output_dir, input_data; total_sample_size = length(loader.index) classcounts, _ = Dataloaders.groupby(selectors, loader.index.targetcounts) # - extended_window_length = 3 * window_length + extended_window_length = past_future_extensions ? 3 * window_length : window_length date = Dates.format(Dates.now(), "yyyy_mm_dd") win = window_length # shorter name to keep next line within the allowed text width output_file = "larva_dataset_$(date)_$(win)_$(win)_$(total_sample_size).hdf5" diff --git a/src/taggingbackends/explorer.py b/src/taggingbackends/explorer.py index 286dae4..0bbccb6 100644 --- a/src/taggingbackends/explorer.py +++ b/src/taggingbackends/explorer.py @@ -485,10 +485,14 @@ run `poetry add {pkg}` from directory: \n def generate_dataset(self, input_files, labels=None, window_length=20, sample_size=None, balance=True, - include_all=None, frame_interval=None, seed=None): + include_all=None, frame_interval=None, seed=None, + past_future_extensions=None): """ Generate a *larva_dataset hdf5* file in data/interim/{instance}/ """ + if past_future_extensions is None: + past_future_extensions = True + logging.warning("Upcoming breaking change: pass argument past_future_extensions=True to BackendExplorer.generate_dataset to maintain current behavior") return TaggingBackends.LarvaDatasets.write_larva_dataset_hdf5( str(self.interim_data_dir()), input_files if isinstance(input_files, list) else str(input_files), @@ -498,6 +502,7 @@ run `poetry add {pkg}` from directory: \n balance=balance, includeall=include_all, frameinterval=frame_interval, + past_future_extensions=past_future_extensions, seed=seed) def compile_trxmat_database(self, input_dir, -- GitLab