Skip to content
Snippets Groups Projects
Commit e1168ae1 authored by François  LAURENT's avatar François LAURENT
Browse files

expand!, mergelabels! and setdefaultlabel!...

expand!, mergelabels! and setdefaultlabel! (larvatagger.jl#91)
parent 56775182
No related branches found
No related tags found
No related merge requests found
Pipeline #96484 passed
......@@ -119,6 +119,19 @@ PlanarLarvae.Datasets.encodelabels
```@docs
PlanarLarvae.Datasets.decodelabels
```
```@docs
PlanarLarvae.Datasets.mergelabels!
```
```@docs
PlanarLarvae.Datasets.setdefaultlabel!
```
```@docs
PlanarLarvae.Datasets.expand!
```
```@docs
PlanarLarvae.Formats.setdefaultlabel!
```
# Data collections
......
......@@ -8,8 +8,8 @@ import JSON3 as JSON
using SHA: sha1
export Dataset, Run, Track, extract_metadata_from_filepath, sort_metadata,
encodelabels, encodelabels!, decodelabels, decodelabels!#,
#pushdependency!, getdependencies
encodelabels, encodelabels!, decodelabels, decodelabels!, mergelabels!#,
#pushdependency!, getdependencies, setdefaultlabel!
const Attributes = AbstractDict{Symbol, Any}
const ConcreteAttributes = Dict{Symbol, Any} # ConcreteAttributes() as default argument value
......@@ -854,6 +854,109 @@ function labels_encoded(run::Run, attrname::Symbol=:labels)
return true
end
"""
expand!(run, run′, defaultstate)
Expand a run so that it features the same tracks and time steps as a second run, using a
default state for the missing time steps.
Beware labels are not processed in a specific way. In particular, the `:labels` attribute is
not updated to reflect the possibly newly introduced label from the default state.
"""
function expand!(run::Run, run′::Run, defaultstate)
newtracks = OrderedDict{TrackID, Track}()
for (trackid, track′) in pairs(run′)
timestamps′= track′.timestamps
if trackid keys(run)
track = run[trackid]
timestamps = track.timestamps
if timestamps == timestamps′
newtracks[trackid] = track
else
expanded_timestamps = collect(sort(union(timestamps, timestamps′)))
newtracks[trackid] = Track(trackid, expanded_timestamps,
Dict(feature=>[(t timestamps ? track[feature, t] : value)
for t in expanded_timestamps]
for (feature, value) in pairs(defaultstate)))
end
else
newtracks[trackid] = Track(trackid, timestamps′,
Dict(feature=>fill(value, size(timestamps′))
for (feature, value) in defaultstate))
end
end
empty!(run.tracks)
for (trackid, track) in pairs(newtracks)
run.tracks[trackid] = track
end
return run
end
"""
mergelabels!(run, run′)
Overwrite labels/tags in `run` with those defined in `run′`.
Empty labels are considered undefined.
"""
function mergelabels!(run::Run, run′::Run; attrname=(:labels, :names))
labels′, attrname′ = getlabels(run′; attrname=attrname)
if haskey(run.attributes, attrname′)
labels, _ = getlabels(run; attrname=attrname)
labelspec = run.attributes[attrname′]
for label in labels′
if label labels
if labelspec isa AbstractDict
# data loss; we cannot infer the other specs
labelspec = run.attributes[attrname′] = labels
end
push!(labelspec, label)
end
end
end
decodelabels!(run)
for (trackid, track′) in pairs(run′)
trackid in keys(run) || throw("`mergelabels!` expects to find all the tracks of the second run in the first one")
track = run[trackid]
labels = track[attrname′]
labels′= track′[attrname′]
length(labels) == length(labels′) || throw("`mergelabels!` expects the corresponding tracks to be defined on the same time support")
for (i, label) in enumerate(labels′)
isempty(label) || (labels[i] = label)
end
end
return run
end
"""
setdefaultlabel!(run, fullrun, defaultlabel)
Expand over the tracks and time steps defined in another run (see [`Datasets.expand!`](@ref))
and assign a default label/tag to the unlabelled/untagged data.
"""
function setdefaultlabel!(run::Run, fullrun::Run, defaultlabel; attrname=(:labels, :names))
decodelabels!(run)
definedlabels, ftrname = getlabels(run; attrname=attrname)
if fullrun !== run
Datasets.expand!(run, fullrun, Dict(ftrname=>defaultlabel))
if defaultlabel definedlabels
if run.attributes[ftrname] isa AbstractDict
# data loss; cannot infer additional specs
run.attributes[ftrname] = push!(definedlabels, defaultlabel)
else
push!(definedlabels, defaultlabel)
end
end
end
for track in values(run.tracks)
labels = track[ftrname]
labels[isempty.(labels)] .= defaultlabel
end
return run
end
# data dependencies
# handle JSON-deserialized data (convert keys and convert to array)
......
......@@ -18,7 +18,7 @@ using HDF5
export guessfileformat, preload, load, drop_record!, gettimeseries, astimeseries,
getrun, asrun, getmetadata, getlabels, getdependencies, appendtags, TIME_PRECISION,
labelledfiles, unload!
labelledfiles, unload!, setdefaultlabel!
const TIME_PRECISION = 0.001
......@@ -237,7 +237,8 @@ Load timeseries data from file in the format preferred at low-level, either
"""
function getnativerepr end
getnativerepr(file) = gettimeseries(file)
getnativerepr(file::Chore) = gettimeseries(file)
getnativerepr(file::Trxmat) = gettimeseries(file)
getnativerepr(file::FIMTrack) = getrun(file)
# note: labels files with no data dependencies may be broken
getnativerepr(file::JSONLabels) = isempty(file.dependencies) ? getrun(file) : getnativerepr(file.dependencies[1])
......@@ -630,4 +631,40 @@ function from_mwt(file::JSONLabels)
from_mwt(file.dependencies[1])
end
# asemptytracks(run_or_timeseries) = [Track(trackid, LarvaBase.times(trackdata))
# for (trackid, trackdata) in pairs(run_or_timeseries)]
#
# asemptytracks(file::PreloadedFile) = asemptytracks(getnativerepr(file))
"""
setdefaultlabel!(label_file_or_run, label_for_untagged_data)
Assign a label/tag to the unlabelled/untagged data.
All the data points defined in the data dependencies of the label file are considered, and
not only those defined in the label file.
"""
function setdefaultlabel!(run::Run, defaultlabel; attrname=(:labels, :names), filepath=nothing)
deps = Datasets.getdependencies(run, filepath)
fullrun = if isempty(deps)
run
else
dependency = preload(deps[1])
getrun(dependency)
end
Datasets.setdefaultlabel!(run, fullrun, defaultlabel; attrname=attrname)
end
function setdefaultlabel!(file::JSONLabels, defaultlabel; attrname=(:labels, :names))
deps = getdependencies!(file)
fullrun = if isempty(deps)
getrun(file)
else
dependency = file.dependencies[1]
getrun(dependency)
end
Datasets.setdefaultlabel!(getrun(file), fullrun, defaultlabel; attrname=attrname)
return file
end
end
......@@ -510,6 +510,30 @@ with metadata: OrderedDict{Symbol, Any} with 3 entries:
deserialized = Datasets.from_json_file(Run, json_file)
@test deserialized isa Run && haskey(deserialized.attributes, :labels)
timestamps = [66.675, 66.755, 66.856, 66.949, 67.039, 67.115, 67.196]
labelseries = ["crawl", "bend", "back", "stop", "undecided", "hunch", "roll"]
labelseries′= fill("other", length(labelseries))
labels = Run(runid,
[Track(trackid-1, timestamps, Dict(:labels=>labelseries′)),
Track(trackid, timestamps, Dict(:labels=>labelseries′)),
Track(trackid+1, timestamps, Dict(:labels=>labelseries′)),
])
labels.attributes[:labels] = Dict(:names=>["other"], :colors=>["gray"])
labels′= Run(runid, [Track(trackid, timestamps, Dict(:labels=>labelseries))])
labels″= mergelabels!(labels, labels′)
@test labels″ === labels && length(labels″.tracks) == 3
@test labels.attributes[:labels] isa Vector{String} && Set(labelseries) Set(labels.attributes[:labels]) && "other" labels.attributes[:labels]
@test labels[trackid-1][:labels] == labelseries′ && labels[trackid][:labels] == labelseries && labels[trackid+1][:labels] == labelseries′
labels′[trackid-1] = Track(trackid-1, [timestamps[end]], Dict(:labels=>["crawl"]))
labels‴= Datasets.expand!(labels′, labels, Dict(:labels=>"new behavior"))
@test labels‴ === labels′ && length(labels‴.tracks) == 3
@test labels′.attributes[:labels] == labelseries
@test labels′[trackid-1] == Track(trackid-1, timestamps,
Dict(:labels=>["new behavior", "new behavior", "new behavior", "new behavior", "new behavior", "new behavior", "crawl"]))
@test labels′[trackid] == Track(trackid, timestamps, Dict(:labels=>labelseries))
@test labels′[trackid+1] == Track(trackid+1, timestamps,
Dict(:labels=>["new behavior", "new behavior", "new behavior", "new behavior", "new behavior", "new behavior", "new behavior"]))
end
end
......@@ -709,6 +733,13 @@ radius_3(0),0""")
rm(path)
file = load(make_test_data("fimtrack_manual_labels"); framerate=30)
@assert file.run.attributes[:labels][:names] == ["collision", "run", "bend", "stop"] && collect(keys(file.run)) == UInt16[0x0006, 0x0003, 0x0002]
setdefaultlabel!(file, "new behavior")
@test file.run.attributes[:labels] == ["collision", "run", "bend", "stop", "new behavior"]
@test collect(keys(file.run)) == UInt16[0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f]
@test all(label -> label == "new behavior", file.run[0x0000][:labels])
end
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment