Skip to content
Snippets Groups Projects
Select Git revision
  • ee5e09b0cba5020a8199d5023b693a3cf4fa0817
  • main default protected
  • torch2
  • torch1
  • dev protected
  • 20230311_new_default
  • 20230311
  • design protected
  • 20230129
  • 20230111
  • 20221005 protected
  • 20220418 protected
  • v0.20
  • v0.19
  • v0.18
  • v0.17
  • v0.16.4
  • v0.16.3
  • v0.16.2
  • v0.16.1
  • v0.16
  • v0.15
  • v0.14
  • v0.13
  • v0.12.4
  • v0.12.3
  • v0.12.2
  • v0.12.1
  • v0.12
  • v0.11
  • v0.10
  • v0.9.1
32 results

make_dataset.py

Blame
  • make_dataset.py 1.90 KiB
    import glob
    import pathlib
    
    def make_dataset(backend, labels_expected=False, trxmat_only=False,
                     balancing_strategy='maggotuba',
                     pretrained_model_instance='default', **kwargs):
        if labels_expected:
            larva_dataset_file = glob.glob(str(backend.raw_data_dir() / "larva_dataset_*.hdf5"))
            if larva_dataset_file:
                if larva_dataset_file[1:]:
                    print(f"multiple larva_dataset files found")
                larva_dataset_file = pathlib.Path(larva_dataset_file[0])
                # make the file available in data/interim/{instance}/
                print(f"moving file to interim: {larva_dataset_file}")
                backend.move_to_interim(larva_dataset_file, copy=False)
    
            else:
                if 'frame_interval' not in kwargs:
                    autoencoder_config = glob.glob(str(backend.project_dir / "pretrained_models" / pretrained_model_instance / "*config.json"))
                    with open(autoencoder_config[0], "r") as f:
                        config = json.load(f)
                    try:
                        frame_interval = config['frame_interval']
                    except KeyError:
                        pass
                    else:
                        kwargs['frame_interval'] = frame_interval
    
                print("generating a larva_dataset file...")
                # generate a larva_dataset_*.hdf5 file in data/interim/{instance}/
                if False:#trxmat_only:
                    out = backend.compile_trxmat_database(backend.raw_data_dir(), **kwargs)
                else:
                    out = backend.generate_dataset(backend.raw_data_dir(),
                                                   balance=isinstance(balancing_strategy, str) and balancing_strategy.lower() == 'maggotuba',
                                                   **kwargs)
                print(f"larva_dataset file generated: {out}")
    
    
    from taggingbackends.main import main
    
    if __name__ == "__main__":
        main(make_dataset)