Skip to content
Snippets Groups Projects
Commit 60a308f2 authored by François  LAURENT's avatar François LAURENT
Browse files

building larva_dataset files

parent cab06b59
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
# prerequisite in parent dir:
# git clone https://gitlab.pasteur.fr/nyx/TaggingBackends --branch dev
# cd TaggingBackends; julia --project=. -e 'using Pkg; Pkg.instantiate()'
backbone=../TaggingBackends
if ! [ -d "$backbone" ]; then
echo "local repository not found: $backbone"
exit -1
fi
if [ -z $1 ]; then
model=t5_t15_full
else
model="$1"
fi
# choose
convention="back,cast,hunch,roll,run,stop"
#convention="back_large,cast_large,hunch_large,roll_large,run_large,small_motion,stop_large"
echo "convention: $convention"
samplesize=100000
echo "sample size: $samplesize"
# the target total length is 100 < 3*34
winlen=34
echo "window length: $winlen (*3)"
# prerequisite in data/raw: have (parts of) t5 and/or t15 mounted/copied (links do not work)
mkdir -p "data/raw/$model"
prevloc=$(pwd)
cd "data/raw/$model"
[ -d t5 ] || echo "cannot find t5 in directory data/raw/$model"
[ -d t15 ] || echo "cannot find t15 in directory data/raw/$model"
cd "$prevloc"
# note: on first run, remove option --reuse-h5files
JULIA_PROJECT="$backbone" poetry run tagging-backend train --model-instance "$model" --labels "$convention" --trxmat-only --reuse-h5files --sample-size $samplesize --window-length $winlen
# note: for the purpose of generating a larva_dataset file,
# the Julia part of the TaggingBackends package is enough:
#
# $ julia --project=. -e 'using TaggingBackends.Trxmat2HDF5; convert_trxmat_to_spineh5(; files="trxmat.list")
# with the trxmat.list file generated from sftpcampus and directory screens with:
# $ find t{5,15} -mindepth 4 -name trx.mat > ~/trxmat.list
# and retrieved locally.
#
# $ julia --project=. -e 'using TaggingBackends.LarvaDatasets, TaggingBackends.Trxmat2HDF5; write_larva_dataset_hdf5("data/processed", larvah5files("data/interim"); labels=["back","cast","hunch","roll","run","stop"], sample_size=100000)
# TODO: relocate or remove the _larva.h5 files currently found in data/interim/$model,
# prior to calling `predict`
# TODO: suffix the name of the generated larva_dataset file with info about the chosen convention
echo "relocate the following file for future reuse:"
ls "data/interim/$model/larva_dataset_*.hdf5"
#!/bin/bash
# this simple series of commands forces Poetry to update its local dependencies;
# useful with a local TaggingBackends for example (pyproject.toml must be updated first).
rm -rf $(poetry env info -p); rm -f poetry.lock; poetry install -vvv
import glob import glob
import pathlib import pathlib
def make_dataset(backend, labels_expected=False, trxmat_only=False, labels=None, **kwargs): def make_dataset(backend, labels_expected=False, trxmat_only=False, **kwargs):
if labels_expected: if labels_expected:
larva_dataset_file = glob.glob(str(backend.raw_data_dir() / "larva_dataset_*.hdf5")) larva_dataset_file = glob.glob(str(backend.raw_data_dir() / "larva_dataset_*.hdf5"))
if larva_dataset_file: if larva_dataset_file:
...@@ -12,10 +12,6 @@ def make_dataset(backend, labels_expected=False, trxmat_only=False, labels=None, ...@@ -12,10 +12,6 @@ def make_dataset(backend, labels_expected=False, trxmat_only=False, labels=None,
print(f"moving file to interim: {larva_dataset_file}") print(f"moving file to interim: {larva_dataset_file}")
backend.move_to_interim(larva_dataset_file, copy=False) backend.move_to_interim(larva_dataset_file, copy=False)
else: else:
if labels:
if isinstance(labels, str):
labels = labels.split(',')
kwargs["labels"] = labels
print("generating a larva_dataset file...") print("generating a larva_dataset file...")
# generate a larva_dataset_*.hdf5 file in data/interim/{instance}/ # generate a larva_dataset_*.hdf5 file in data/interim/{instance}/
if trxmat_only: if trxmat_only:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment