diff --git a/Project.toml b/Project.toml index 4de1265cbe11ceba8c2499f44e6ce6052e7634fe..53f88ab207220c6425c3da81e6a8bf1e65e83892 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PlanarLarvae" uuid = "c2615984-ef14-4d40-b148-916c85b43307" authors = ["François Laurent", "Institut Pasteur"] -version = "0.11.2" +version = "0.12" [deps] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" diff --git a/docs/src/index.md b/docs/src/index.md index 612175d1350390ebe93a7a3f0f7b451a2bd97839..e232ff484d406255e16c2b850c9afc83f2ba19a8 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -128,6 +128,9 @@ PlanarLarvae.Datasets.setdefaultlabel! ```@docs PlanarLarvae.Datasets.expand! ``` +```@docs +PlanarLarvae.Datasets.segment +``` ```@docs PlanarLarvae.Formats.setdefaultlabel! diff --git a/src/Dataloaders.jl b/src/Dataloaders.jl index aa7ec88a1cac3d6adddbb511bd73458c053949db..687e968422c1f6bac2b56701ca6edd1c18efd19f 100644 --- a/src/Dataloaders.jl +++ b/src/Dataloaders.jl @@ -354,6 +354,7 @@ const TRXMAT_ACTION_MODIFIERS = Dict(:strong=>r"_strong$", :weak=>r"_weak$") function countthresholds(counts, selectors, majority_minority_ratio) totalcounts = total(counts) classcounts, classes = groupby(selectors, totalcounts) + isempty(classcounts) && @error "Not any specified label found" selectors totalcounts mincount, maxcount = countthresholds(classcounts, majority_minority_ratio) return mincount, maxcount, classcounts, classes end @@ -361,7 +362,9 @@ end function countthresholds(counts, majority_minority_ratio) minoritylabel, mincount = first(counts) for (label, count) in pairs(counts) - if count < mincount + if count == 0 + @warn "Label not found" label + elseif count < mincount minoritylabel, mincount = label, count end end @@ -477,7 +480,7 @@ function prioritylabel(label; verbose=true) if 0 < preincluded < inclusions others = min(maxothers, upper) - inclusions if verbose - @info "Explicit inclusions based on label: $speciallabel" class count=(preincluded=>inclusions) others=(maxothers=>others) + @info "Explicit inclusions (initially selected => eventually selected):" class=class priority_tag=speciallabel with_priority_tag=(preincluded=>inclusions) without_priority_tag=(maxothers=>others) end priorityratio = inclusions / maxinclusions newratio = others / maxothers diff --git a/src/Datasets.jl b/src/Datasets.jl index 2f1d85c3200f2dfbfc29e674e3d17b73c2534850..c61033579bcd65fbfdc7eefe63cc3c51c98c2947 100644 --- a/src/Datasets.jl +++ b/src/Datasets.jl @@ -130,7 +130,7 @@ Base.:(==)(d1::Dataset, d2::Dataset) = d1.attributes == d2.attributes && d1.runs Base.:(==)(r1::Run, r2::Run) = r1.id == r2.id && r1.attributes == r2.attributes && r1.tracks == r2.tracks Base.:(==)(t1::Track, t2::Track) = t1.id == t2.id && t1.attributes == t2.attributes && t1.timestamps == t2.timestamps && t1.states == t2.states -Base.isempty(track::Track) = isempty(track.states) +Base.isempty(track::Track) = isempty(track.timestamps) || isempty(track.states) Base.isempty(run::Run) = isempty(run.tracks) Base.isempty(dataset::Dataset) = isempty(dataset.runs) @@ -967,6 +967,53 @@ function labels_encoded(run::Run, recordname::Symbol=:labels) return true end +""" + segment(track_run_or_dataset, t0, t1) + segment(outputfile, inputfile, t0, t1) + +Crop the timeseries to time segment `[t0, t1]`. + +`inputfile` is a file path; `outputfile` is a filename. The output file is written in the +same directory as the input file. +""" +function segment(track::Track, t0::Real, t1::Real) + I = @. t0 <= track.timestamps <= t1 + timestamps = track.timestamps[I] + states = copy(track.states) + for (record, timeseries) in pairs(track.states) + states[record] = timeseries[I] + end + return Track(track.id, track.attributes, timestamps, states) +end + +function segment(run::Run, t0, t1) + tracks = empty(run.tracks) + for (trackid, track) in pairs(run.tracks) + track = segment(track, t0, t1) + isempty(track) || (tracks[trackid] = track) + end + return Run(run.id, run.attributes, tracks) +end + +function segment(dataset::Dataset, t0, t1) + runs = empty(dataset.runs) + for (runid, run) in pairs(dataset.runs) + run = segment(run, t0, t1) + isempty(run) || (runs[runid] = run) + end + return Dataset(dataset.attributes, runs) +end + +function segment(::Type{T}, outfile::String, infile::String, t0, t1) where {T} + basename(outfile) == outfile || throw("Paths are not allowed for output file") + outfile = joinpath(dirname(infile), outfile) + run_or_dataset = from_json_file(T, infile) + run_or_dataset = segment(run_or_dataset, t0, t1) + to_json_file(outfile, run_or_dataset) +end + +segment(outfile::String, infile::String, t0, t1) = segment(Run, outfile, infile, t0, t1) + """ expand!(run, run′, defaultstate) diff --git a/src/Formats.jl b/src/Formats.jl index 81834c20c7ed36c194fa5f0e9c901b6c97dc4b0f..5bf1da2d9b65184e00a0c2d2911738ff0c36c53c 100644 --- a/src/Formats.jl +++ b/src/Formats.jl @@ -754,10 +754,31 @@ function normalize_timeseries_timestamps(file, ts=nothing; digits=4) end else for (trackid, track) in pairs(timeseries) + if trackid ∉ keys(ts) + @debug "Skipping track #$trackid" + continue + end ts′= ts[trackid] - length(track) == length(ts′) || throw("unequal numbers of time steps") - track = [(t, v) for (t, (_, v)) in zip(ts′, track)] - timeseries[trackid] = track + if length(track) != length(ts′) + @debug "Unequal numbers of time steps" + # we assume `ts` is a segment (successive timestamps in `track`) + i = searchsortedfirst(LarvaBase.times(track), ts′[1]) + t0 = round(track[i][1]; digits=digits) + if ts′[1] != t0 && 1 < i + i -= 1 + t0 = round(track[i][1]; digits=digits) + end + j = i + length(ts′) - 1 + t1 = round(track[j][1]; digits=digits) + if t0 == ts′[1] && t1 == ts′[end] + track = track[i:j] + else + t0=[t for (t, _) in track[i-1:i+1]]; t1=[t for (t, _) in track[j-1:j+1]] + @debug "Cannot find segment ends in extended timeseries" t0[1] t0[2] t0[3] t0′=ts′[1] t1[1] t1[2] t1[3] t1′=ts′[end] + throw("Cannot find contiguous timestamps") + end + end + timeseries[trackid] = [(t, v) for (t, (_, v)) in zip(ts′, track)] end end return timeseries diff --git a/test/runtests.jl b/test/runtests.jl index 5b37197ad9d35a7823b94c2b19eb703274fb9535..c47f28ee42b29e5dad0d5a5ec3115ad995202e6f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -583,6 +583,20 @@ with metadata: OrderedDict{Symbol, Any} with 3 entries: delete!(dataset.attributes, :dependencies) @test !shareddependencies(dataset, dataset′) + dataset = Dataset([Run(runid, + [Track(1, + Datasets.Timestamp[56.2, 56.3, 56.4], + :record=>[1, 2, 3]), + Track(2, + Datasets.Timestamp[59.8, 60, 60.2, 61.8, 62, 62.2], + :record=>[1, 2, 3, 4, 5, 6])])]) + cropped_dataset = Datasets.segment(dataset, 60, 62) + cropped_run = first(values(cropped_dataset)) + cropped_track = first(values(cropped_run)) + @test length(cropped_dataset) == 1 && length(cropped_run) == 1 + expected_track = Track(2, Datasets.Timestamp[60, 60.2, 61.8, 62], :record=>[2, 3, 4, 5]) + @test cropped_track == expected_track + end end