Skip to content
Snippets Groups Projects
Commit f9e9aff9 authored by François  LAURENT's avatar François LAURENT
Browse files

interpolation support at commandline and predict_model levels

parent 6d25d0c1
No related branches found
No related tags found
No related merge requests found
Pipeline #89662 passed
...@@ -250,6 +250,7 @@ end ...@@ -250,6 +250,7 @@ end
function write_larva_dataset_hdf5(path, counts, files, refs, nsteps_before, nsteps_after; function write_larva_dataset_hdf5(path, counts, files, refs, nsteps_before, nsteps_after;
fixmwt=false, frameinterval=nothing, fixmwt=false, frameinterval=nothing,
) )
fixmwt && @warn "`fixmwt=true` is no longer supported"
# this method mutates argument `refs` # this method mutates argument `refs`
refs′= Tuple{Int, Int, Int, eltype(keys(counts))}[] refs′= Tuple{Int, Int, Int, eltype(keys(counts))}[]
for (label, count) in pairs(counts) for (label, count) in pairs(counts)
...@@ -278,6 +279,9 @@ function write_larva_dataset_hdf5(path, counts, files, refs, nsteps_before, nste ...@@ -278,6 +279,9 @@ function write_larva_dataset_hdf5(path, counts, files, refs, nsteps_before, nste
# extension # extension
h5["labels"] = collect(keys(counts)) h5["labels"] = collect(keys(counts))
#h5["files"] = [f.source for f in files] #h5["files"] = [f.source for f in files]
if !isnothing(frameinterval)
attributes(h5["samples"])["frame_interval"] = frameinterval
end
end end
end end
......
...@@ -415,7 +415,8 @@ run `poetry add {pkg}` from directory: \n ...@@ -415,7 +415,8 @@ run `poetry add {pkg}` from directory: \n
return input_files, labels return input_files, labels
def generate_dataset(self, input_files, def generate_dataset(self, input_files,
labels=None, window_length=20, sample_size=None): labels=None, window_length=20, sample_size=None,
frame_interval=None):
""" """
Generate a *larva_dataset hdf5* file in data/interim/{instance}/ Generate a *larva_dataset hdf5* file in data/interim/{instance}/
""" """
...@@ -424,7 +425,8 @@ run `poetry add {pkg}` from directory: \n ...@@ -424,7 +425,8 @@ run `poetry add {pkg}` from directory: \n
input_files if isinstance(input_files, list) else str(input_files), input_files if isinstance(input_files, list) else str(input_files),
window_length, window_length,
labels=labels, labels=labels,
sample_size=sample_size) sample_size=sample_size,
frameinterval=frame_interval)
def compile_trxmat_database(self, input_dir, def compile_trxmat_database(self, input_dir,
labels=None, window_length=20, sample_size=None, reuse_h5files=False): labels=None, window_length=20, sample_size=None, reuse_h5files=False):
......
...@@ -19,3 +19,46 @@ def get_5point_spines(spine): ...@@ -19,3 +19,46 @@ def get_5point_spines(spine):
return spine return spine
else: else:
raise NotImplementedError(spine.shape) raise NotImplementedError(spine.shape)
def interpolate(times, spines, anchor, window_length,
spine_interpolation='linear', frame_interval=0.1, **kwargs):
"""
Interpolate spine series around anchor time `times[anchor]`, with about
`window_length // 2` time steps before and after, evenly spaced by
`frame_interval`.
Only linear interpolation is supported for now.
"""
# m = anchor
# n = m + window_length
# if n <= spines.shape[0]:
# return spines[m:n,:]
# else:
# return
assert spine_interpolation == 'linear'
tstart, anchor, tstop = times[0], times[anchor], times[-1]
istart = np.trunc((tstart - anchor) / frame_interval).astype(int)
istop = np.trunc((tstop - anchor) / frame_interval).astype(int)
nframes_before = window_length // 2
nframes_after = window_length - 1 - nframes_before
istart = max(-nframes_before, istart)
istop = min(nframes_after, istop)
if istop - istart + 1 < window_length:
return
grid = range(istart, istop+1)
series = []
for i in grid:
t = round((anchor + i * frame_interval) * 1e4) * 1e-4
inext = np.flatnonzero(t <= times)[0]
tnext, xnext = times[inext], spines[inext]
if tnext == t:
x = xnext
else:
assert 0 < inext
tprev, xprev = times[inext-1], spines[inext-1]
x = interp(xprev, xnext, (t - tprev) / (tnext - tprev))
series.append(x)
return np.stack(series, axis=0)
def interp(x0, x1, alpha):
return (1 - alpha) * x0 + alpha * x1
...@@ -8,7 +8,7 @@ def help(_print=False): ...@@ -8,7 +8,7 @@ def help(_print=False):
Usage: tagging-backend [train|predict] --model-instance <name> Usage: tagging-backend [train|predict] --model-instance <name>
tagging-backend train ... --labels <comma-separated-list> tagging-backend train ... --labels <comma-separated-list>
tagging-backend train ... --sample-size <N> --window-length <T> tagging-backend train ... --sample-size <N> --window-length <T>
tagging-backend train ... --trxmat-only --reuse-h5files tagging-backend train ... --frame-interval <I>
tagging-backend train ... --pretrained-model-instance <name> tagging-backend train ... --pretrained-model-instance <name>
tagging-backend predict ... --skip-make-dataset tagging-backend predict ... --skip-make-dataset
...@@ -21,6 +21,10 @@ spines. If option `--sample-size` is passed, <N> time segments are sampled from ...@@ -21,6 +21,10 @@ spines. If option `--sample-size` is passed, <N> time segments are sampled from
the raw database. The total length 3*<T> of time segments is 60 per default the raw database. The total length 3*<T> of time segments is 60 per default
(20 *past* points, 20 *present* points and 20 *future* points). (20 *past* points, 20 *present* points and 20 *future* points).
If frame interval <I> is specified (in seconds), spine series are resampled and
interpolated around each time segment anchor (center).
**Deprecated**:
Option `--trxmat-only` is suitable for large databases made of trx.mat files Option `--trxmat-only` is suitable for large databases made of trx.mat files
only. Intermediate HDF5 files are generated prior to counting the various only. Intermediate HDF5 files are generated prior to counting the various
behavior labels and sampling time segments in the database. These intermediate behavior labels and sampling time segments in the database. These intermediate
...@@ -48,7 +52,7 @@ def main(fun=None): ...@@ -48,7 +52,7 @@ def main(fun=None):
train_or_predict = sys.argv[1] train_or_predict = sys.argv[1]
project_dir = model_instance = None project_dir = model_instance = None
input_files, labels = [], [] input_files, labels = [], []
sample_size = window_length = None sample_size = window_length = frame_interval = None
trxmat_only = reuse_h5files = False trxmat_only = reuse_h5files = False
skip_make_dataset = skip_build_features = False skip_make_dataset = skip_build_features = False
pretrained_model_instance = None pretrained_model_instance = None
...@@ -72,6 +76,9 @@ def main(fun=None): ...@@ -72,6 +76,9 @@ def main(fun=None):
elif sys.argv[k] == "--window-length": elif sys.argv[k] == "--window-length":
k = k + 1 k = k + 1
window_length = sys.argv[k] window_length = sys.argv[k]
elif sys.argv[k] == "--frame-interval":
k = k + 1
frame_interval = sys.argv[k]
elif sys.argv[k] == "--trxmat-only": elif sys.argv[k] == "--trxmat-only":
trxmat_only = True trxmat_only = True
elif sys.argv[k] == "--reuse-h5files": elif sys.argv[k] == "--reuse-h5files":
...@@ -98,6 +105,8 @@ def main(fun=None): ...@@ -98,6 +105,8 @@ def main(fun=None):
make_dataset_kwargs["sample_size"] = sample_size make_dataset_kwargs["sample_size"] = sample_size
if window_length: if window_length:
make_dataset_kwargs["window_length"] = window_length make_dataset_kwargs["window_length"] = window_length
if frame_interval:
make_dataset_kwargs["frame_interval"] = frame_interval
if trxmat_only: if trxmat_only:
make_dataset_kwargs["trxmat_only"] = True make_dataset_kwargs["trxmat_only"] = True
if reuse_h5files: if reuse_h5files:
...@@ -125,6 +134,9 @@ def main(fun=None): ...@@ -125,6 +134,9 @@ def main(fun=None):
elif key in ("sample_size", "window_length"): elif key in ("sample_size", "window_length"):
if isinstance(val, str): if isinstance(val, str):
val = int(val) val = int(val)
elif key in ("frame_interval",):
if isinstance(val, str):
val = float(val)
elif key == "labels": elif key == "labels":
if isinstance(val, str): if isinstance(val, str):
val = val.split(',') val = val.split(',')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment