diff --git a/Project.toml b/Project.toml index cc1dc08bbf812dfec729a13be24119c7069939a2..672f20ba4e212055c69b5cde18b7007ba2b39aba 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TaggingBackends" uuid = "e551f703-3b82-4335-b341-d497b48d519b" authors = ["François Laurent", "Institut Pasteur"] -version = "0.14" +version = "0.14.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/LarvaDatasets.jl b/src/LarvaDatasets.jl index babf42dfc47e7372f46addd22287ae8566120ebb..dc37f4241240585a895e4e880eb77eb8bdb04f83 100644 --- a/src/LarvaDatasets.jl +++ b/src/LarvaDatasets.jl @@ -555,6 +555,7 @@ function write_larva_dataset_hdf5(output_dir::String, includeall="edited", seed=nothing, distributed_sampling=true, + past_future_extensions=true, ) if distributed_sampling new_write_larva_dataset_hdf5(output_dir, input_data; @@ -570,8 +571,10 @@ function write_larva_dataset_hdf5(output_dir::String, fixmwt=fixmwt, frameinterval=frameinterval, includeall=includeall, + past_future_extensions=past_future_extensions, seed=seed) else + past_future_extensions || throw("not implemented") legacy_write_larva_dataset_hdf5(output_dir, input_data, window_length; labels=labels, labelpointers=labelpointers, @@ -595,6 +598,7 @@ function new_write_larva_dataset_hdf5(output_dir, input_data; balance=true, frameinterval=0.1, includeall="edited", + past_future_extensions=true, seed=nothing, kwargs...) repo = if input_data isa String @@ -608,7 +612,7 @@ function new_write_larva_dataset_hdf5(output_dir, input_data; end @assert !isnothing(frameinterval) window = TimeWindow(window_length * frameinterval, round(Int, 1 / frameinterval); - maggotuba_compatibility=true) + maggotuba_compatibility=past_future_extensions) selectors = isnothing(labels) ? getprimarylabels(first(Dataloaders.files(repo))) : labels min_max_ratio = balance ? 2 : 20 index = if isnothing(includeall) @@ -627,7 +631,7 @@ function new_write_larva_dataset_hdf5(output_dir, input_data; total_sample_size = length(loader.index) classcounts, _ = Dataloaders.groupby(selectors, loader.index.targetcounts) # - extended_window_length = 3 * window_length + extended_window_length = past_future_extensions ? 3 * window_length : window_length date = Dates.format(Dates.now(), "yyyy_mm_dd") win = window_length # shorter name to keep next line within the allowed text width output_file = "larva_dataset_$(date)_$(win)_$(win)_$(total_sample_size).hdf5" diff --git a/src/taggingbackends/explorer.py b/src/taggingbackends/explorer.py index 286dae451ff1189f260eb44670bf7b3684c79807..0bbccb648c16643fdd22477f19a91904abda3040 100644 --- a/src/taggingbackends/explorer.py +++ b/src/taggingbackends/explorer.py @@ -485,10 +485,14 @@ run `poetry add {pkg}` from directory: \n def generate_dataset(self, input_files, labels=None, window_length=20, sample_size=None, balance=True, - include_all=None, frame_interval=None, seed=None): + include_all=None, frame_interval=None, seed=None, + past_future_extensions=None): """ Generate a *larva_dataset hdf5* file in data/interim/{instance}/ """ + if past_future_extensions is None: + past_future_extensions = True + logging.warning("Upcoming breaking change: pass argument past_future_extensions=True to BackendExplorer.generate_dataset to maintain current behavior") return TaggingBackends.LarvaDatasets.write_larva_dataset_hdf5( str(self.interim_data_dir()), input_files if isinstance(input_files, list) else str(input_files), @@ -498,6 +502,7 @@ run `poetry add {pkg}` from directory: \n balance=balance, includeall=include_all, frameinterval=frame_interval, + past_future_extensions=past_future_extensions, seed=seed) def compile_trxmat_database(self, input_dir, diff --git a/src/taggingbackends/main.py b/src/taggingbackends/main.py index 166bc9c9d3012cae7ed355cff8c09e20b47e0a60..a5b7b8e98d237e375fff0e7990709e49573a4080 100644 --- a/src/taggingbackends/main.py +++ b/src/taggingbackends/main.py @@ -111,6 +111,7 @@ def main(fun=None): trxmat_only = reuse_h5files = False make_dataset = build_features = None pretrained_model_instance = None + original_model_instance = None sandbox = False balancing_strategy = 'auto' include_all = None