diff --git a/Project.toml b/Project.toml index 7002ab52a2c0beba001d16fbfc8e0cf4e2a589c1..ca5795b8266550c0da2ecf8b8ad7b94cfd6e39c7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PlanarLarvae" uuid = "c2615984-ef14-4d40-b148-916c85b43307" authors = ["François Laurent", "Institut Pasteur"] -version = "0.13" +version = "0.14" [deps] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" diff --git a/docs/src/index.md b/docs/src/index.md index e232ff484d406255e16c2b850c9afc83f2ba19a8..289983d8fea3ad23e1dae54674a7ad15f5c978b2 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -253,6 +253,53 @@ larvatrack derivedtype ``` +# Data loaders + +```@docs +PlanarLarvae.Dataloaders +``` + +```@docs +PlanarLarvae.Dataloaders.Repository +``` + +```@docs +PlanarLarvae.Dataloaders.TimeWindow +``` + +```@docs +PlanarLarvae.Dataloaders.TimeSegment +``` + +```@docs +PlanarLarvae.Dataloaders.LazyIndex +``` + +```@docs +PlanarLarvae.Dataloaders.DataLoader +``` + +```@docs +PlanarLarvae.Dataloaders.ratiobasedsampling +``` + +```@docs +PlanarLarvae.Dataloaders.capacitysampling +``` + +```@docs +PlanarLarvae.Dataloaders.buildindex +``` + +```@docs +PlanarLarvae.Dataloaders.samplesize! +``` + +```@docs +PlanarLarvae.Dataloaders.sample +``` + + # Index ```@index diff --git a/src/Dataloaders.jl b/src/Dataloaders.jl index ade162ea5efa3fc009a2742caebfca831813d68a..8edcde7951e1667881d5d289a0900c342f012240 100644 --- a/src/Dataloaders.jl +++ b/src/Dataloaders.jl @@ -17,7 +17,7 @@ using ..MWT: interpolate using Random export DataLoader, Repository, TimeWindow, ratiobasedsampling, buildindex, sample, - extendedlength, prioritylabel, capacitysampling + extendedlength, prioritylabel, capacitysampling, samplesize! """ The default window type defines a time segment as the time of a segment-specific reference @@ -61,6 +61,16 @@ function extendedlength(w::TimeWindow) round(Int, extendedduration * w.samplerate) + 1 end +""" + TimeSegment(trackid, anchortime, window, class, timeseries) + +Time segment representation that includes: +* the ID of the originating track (attribute `trackid`), +* the anchor time (*i.e.* the central timestamp, attribute `anchortime`), +* the time window used to generate the time segment (attribute `window`), +* the behavior label or label class (attribute `class`), +* and the timeseries data (attribute `timeseries`). +""" struct TimeSegment trackid anchortime @@ -117,6 +127,23 @@ function segment(file, window, trackid, step, class) TimeSegment(trackid, anchortime, window, class, segmentdata) end +""" + Repository(rootpath) + Repository(rootpath, pattern; basename_only=false) + Repository(rootpath, fileselector) + +Data repository. Data file provider. + +Files are recursively sought for from root directory `rootpath`. + +Regular expression `pattern` can be used to control what files are included based on their +relative path or filename (if `basename_only=true`). + +Instead, as a more general control mechanism, boolean function `fileselector` takes a +[`PreloadedFile`](@ref Main.PlanarLarvae.Formats.PreloadedFile) as input argument. +See also [`labelledfiles`](@ref Main.PlanarLarvae.Formats.labelledfiles), argument +`selection_rule`. +""" struct Repository root::String files::Vector{Formats.PreloadedFile} @@ -132,7 +159,7 @@ function Repository(root::String, pattern::Regex; basename_only::Bool=false) file″= try preload(file; shallow=true) catch - @warn "Cannot read labels from file" file + @warn "Cannot preload file" file continue end push!(files, file″) @@ -172,6 +199,12 @@ root(repo::Repository) = repo.root files(repo::Repository) = repo.files filepaths(repo) = [relpath(file.source, root(repo)) for file in files(repo)] +""" + DataLoader(repository, window, index) + +Data loader as a simple combination of a datafile repository, a time window and an index of +time segments. +""" struct DataLoader repository window @@ -181,13 +214,13 @@ end root(loader::DataLoader) = root(loader.repository) files(loader::DataLoader) = files(loader.repository) -function countlabels(loader::DataLoader; unload=false) - countlabels(loader.repository, loader.window; unload=unload) +function countlabels(loader::DataLoader; kwargs...) + countlabels(loader.repository, loader.window; kwargs...) end const Count = Dict{Union{String, Vector{String}}, Int} -function countlabels(repository::Repository, window; unload=false) +function countlabels(repository::Repository, window; unload=false, skiperrors=false) counts = Dict{Formats.PreloadedFile, Count}() ch = Channel() do ch foreach(files(repository)) do file @@ -196,7 +229,23 @@ function countlabels(repository::Repository, window; unload=false) end c = Threads.Condition() Threads.foreach(ch) do file - counts′= countlabels(Formats.getnativerepr(file), window) + data = try + Formats.getnativerepr(file) + catch + @error "Failed to load file" file=file.source + if skiperrors + return + else + rethrow() + end + end + counts′= try + countlabels(data, window) + catch + # "missing :tags field in NamedTuple" results from `drop_record!` in `sample` + @error "Failed to load labels; are you reusing a DataLoader?" file=file.source + rethrow() + end unload && unload!(file; gc=true) lock(c) try @@ -248,6 +297,16 @@ end total(counts::Dict{<:Any, Int}) = counts +""" + LazyIndex(sampler) + LazyIndex(maxcounts, targetcounts, sampler) + +Index of time segments. + +After calling [`buildindex`](@ref), attributes `maxcounts` and `targetcounts` are defined. + +See also [`ratiobasedsampling`](@ref) and [`capacitysampling`](@ref). +""" mutable struct LazyIndex maxcounts targetcounts @@ -266,35 +325,71 @@ abstract type RatioBasedSampling end struct ClassRatios <: RatioBasedSampling selectors majority_minority_ratio - seed + rng end struct IntraClassRatios <: RatioBasedSampling selectors majority_minority_ratio intraclass - seed + rng end function withselectors(sampler::T, selectors) where {T} T((field === :selectors ? selectors : getfield(sampler, field) for field in fieldnames(T))...) end -function ratiobasedsampling(selectors, majority_minority_ratio; seed=nothing) - LazyIndex(ClassRatios(asselectors(selectors), majority_minority_ratio, seed)) +""" + ratiobasedsampling(selectors, majority_minority_ratio; seed=nothing, rng=nothing) + ratiobasedsampling(selectors, majority_minority_ratio, intraclass; ...) + +Important! Returns a [`LazyIndex`](@ref) object. + +Sample the labels considering two categories of classes: majority classes and minority +classes. +Minority classes share a same sample size (per class), aligned with the least represented +class. +Majority classes share another sample size, per default twice as large as that of minority +classes. +Argument `majority_minority_ratio` defines the sample size ratio between majority and +minority classes. + +Classes are defined following arguments `selectors`. Per default, each label represents a +class. +When some observations come with multiple labels, one of these labels can be specified with +argument `intraclass` so that the observations with this label are systematically included. +""" +function ratiobasedsampling(selectors, majority_minority_ratio; seed=nothing, rng=nothing) + if !isnothing(seed) + if isnothing(rng) + rng = Random.default_rng() + end + Random.seed!(rng, seed) + end + LazyIndex(ClassRatios(asselectors(selectors), majority_minority_ratio, rng)) end -function ratiobasedsampling(selectors, majority_minority_ratio, intraclass; seed=nothing) +function ratiobasedsampling(selectors, majority_minority_ratio, intraclass; + seed=nothing, rng=nothing, +) + if !isnothing(seed) + if isnothing(rng) + rng = Random.default_rng() + end + Random.seed!(rng, seed) + end intraclass = if intraclass isa Pair Dict(asselector(intraclass.first) => intraclass.second) else Dict(asselector(selector) => f for (selector, f) in intraclass) end LazyIndex(IntraClassRatios(asselectors(selectors), majority_minority_ratio, intraclass, - seed)) + rng)) end -init!(sampler) = isnothing(sampler.seed) || Random.seed!(sampler.seed) +function init!(_) + @warn "init! is deprecated; calls to init! can safely be removed" +end classtype(sampler::RatioBasedSampling) = classtype(sampler.selectors) classtype(::AbstractDict{T, <:Any}) where {T} = T @@ -408,11 +503,25 @@ function countthresholds(sampler::RatioBasedSampling, counts) countthresholds(counts, sampler.selectors, sampler.majority_minority_ratio) end +""" + buildindex(loader; unload=false, verbose=true, skiperrors=false) + buildindex(index, repository, window; ...) + +Build an index of time segments in a data repository, organized per label class. + +`unload=true` frees memory early; timeseries data are unallocated as soon as consumed. + +`verbose=true` prints a summary of time segment counts per label class. + +`skiperrors=true` skips data files (typically *trx.mat* files) that cannot be loaded. +""" function buildindex(loader::DataLoader; kwargs...) buildindex(loader.index, loader.repository, loader.window; kwargs...) end -function buildindex(ix::LazyIndex, repository, window; unload=false, verbose=true) +function buildindex(ix::LazyIndex, repository, window; + unload=false, verbose=true, skiperrors=false, +) sampler = ix.sampler if hasproperty(sampler, :selectors) && isnothing(sampler.selectors) anyfile = first(files(repository)) @@ -423,7 +532,7 @@ function buildindex(ix::LazyIndex, repository, window; unload=false, verbose=tru ix.sampler = sampler = withselectors(sampler, asselectors(labels)) end # - ix.maxcounts = countlabels(repository, window; unload=unload) + ix.maxcounts = countlabels(repository, window; unload=unload, skiperrors=skiperrors) ix.targetcounts = buildindex(sampler, ix) # if verbose @@ -536,6 +645,21 @@ function prioritylabel(label; verbose=true) return selector => priority_include end +""" + sample(f, loader, features=:spine) + +Sample time segments and apply function `f` on the segments from each file. + +Function `f` takes the following arguments: +* file index (`Int`), +* file object ([`PreloadedFile`](@ref Main.PlanarLarvae.Formats.PreloadedFile), +* couple of: + * cumulated count of time segments prior to current file (`Int`), + * expected count of time segments for current file (`Int`), +* list of time segments (`Vector{TimeSegment}`). + +See also [`TimeSegment`](@ref). +""" function sample(f, loader::DataLoader, features=:spine; kwargs...) ch = Channel() do ch state = nothing @@ -553,7 +677,6 @@ function sample(f, loader::DataLoader, features=:spine; kwargs...) end function sample(file::Formats.PreloadedFile, window, ix::LazyIndex, features; kwargs...) - init!(ix.sampler) sample(ix.sampler, file, window, ix.targetcounts[file], features; kwargs...) end @@ -581,10 +704,12 @@ function sample(sampler, file, window, counts, features; verbose=false) @info "In file: $(file.source)\nSample sizes (observed => selected):" [(label isa Vector ? Symbol(Symbol.(label)) : Symbol(label)) => (count => get(counts, label, 0)) for (label, count) in pairs(observedcounts)]... end + rng = isnothing(sampler.rng) ? Random.default_rng() : sampler.rng + # pick time segments at random to achieve the desired class counts for (label, count) in pairs(counts) label in keys(index) || continue - pointers = shuffle(index[label]) + pointers = shuffle(rng, index[label]) # counts should not exceed the actual numbers index[label] = pointers[1:count] end @@ -642,15 +767,30 @@ presample(_, cumulatedcount, _, _, counts) = (sum(cumulatedcount), sum(values(co struct CapacitySampling <: RatioBasedSampling selectors maxcount::Integer - seed + rng end -function capacitysampling(selectors, maxcount::Integer; seed=nothing) - LazyIndex(CapacitySampling(asselectors(selectors), maxcount, seed)) +""" + capacitysampling(maxcount; seed=nothing, rng=nothing) + capacitysampling(selectors, maxcount; ...) + +Important! Returns a [`LazyIndex`](@ref) object. + +Sample each class up to a maximum count `maxcount`. +Class definition can be controlled with argument `selectors`. +""" +function capacitysampling(selectors, maxcount::Integer; seed=nothing, rng=nothing) + if !isnothing(seed) + if isnothing(rng) + rng = Random.default_rng() + end + Random.seed!(rng, seed) + end + LazyIndex(CapacitySampling(asselectors(selectors), maxcount, rng)) end -function capacitysampling(maxcount::Integer; seed=nothing) - capacitysampling(nothing, maxcount; seed=seed) +function capacitysampling(maxcount::Integer; kwargs...) + capacitysampling(nothing, maxcount; kwargs...) end function ratio(sampler::CapacitySampling, counts) @@ -659,4 +799,79 @@ function ratio(sampler::CapacitySampling, counts) return ratios end +""" + samplesize!(index, n) + +Adjust target counts so that the total sample size is `n`. +This should be performed after [`buildindex`](@ref) and before [`sample`](@ref). + +See also [`capacitysampling`](@ref) for per-class control of sample size. +""" +function samplesize!(index, sample_size) + @assert !isnothing(index.targetcounts) "buildindex must be called before samplesize!" + + total_sample_size = length(index) + sample_size < total_sample_size || return index + + ratio = sample_size / total_sample_size + + rng = isnothing(index.sampler.rng) ? Random.default_rng() : index.sampler.rng + + # apply `ratio` to the total counts first + maxcounts = Dataloaders.total(index.maxcounts) + targetcounts = Dataloaders.total(index.targetcounts) + for (label, count) in pairs(targetcounts) + targetcounts[label] = round(Int, count * ratio) + end + totalcount = sum(values(targetcounts)) + if totalcount < sample_size + for label in shuffle(rng, collect(keys(targetcounts))) + if targetcounts[label] < maxcounts[label] + targetcounts[label] += 1 + totalcount += 1 + totalcount == sample_size && break + end + end + elseif sample_size < totalcount + for label in shuffle(rng, collect(keys(targetcounts))) + if 0 < targetcounts[label] + targetcounts[label] -= 1 + totalcount -= 1 + totalcount == sample_size && break + end + end + end + + # apply `ratio` at the per-file level + targetcountsperfile = copy(index.targetcounts) + for counts in values(targetcountsperfile) + for (label, count) in pairs(counts) + counts[label] = round(Int, count * ratio) + end + end + targetcounts′= Dataloaders.total(targetcountsperfile) + for (label, targetcount) in pairs(targetcounts) + count = targetcounts′[label] + while count < targetcount + file = rand(rng, collect(keys(targetcountsperfile))) + counts = targetcountsperfile[file] + if haskey(counts, label) && counts[label] < index.maxcounts[file][label] + counts[label] += 1 + count += 1 + end + end + while targetcount < count + file = rand(rng, collect(keys(targetcountsperfile))) + counts = targetcountsperfile[file] + if 0 < get(counts, label, 0) + counts[label] -= 1 + count -= 1 + end + end + end + + index.targetcounts = targetcountsperfile + return index +end + end diff --git a/src/MWT.jl b/src/MWT.jl index 2fb6a7ba4ff5c57fd65a658ca637e2b785b40de7..39e9c173e364d859a49fea9cc666cc7ad685a06a 100644 --- a/src/MWT.jl +++ b/src/MWT.jl @@ -76,7 +76,7 @@ interpolate(x1::T, x2::T, lambda) where {T<:AbstractFloat} = (1 - lambda) * x1 + interpolate(x1::T, x2::T, lambda, - ) where {T<:AbstractArray} = @. (1 - lambda) * x1 + lambda * x2 + ) where {T<:AbstractArray{<:AbstractFloat}} = @. (1 - lambda) * x1 + lambda * x2 function interpolate(geometry1::T, geometry2::T, lambda) where {T<:Larva.SpineGeometry} x1, y1 = collect.(zip(coordinates.(Larva.vertices′(geometry1))...)) x2, y2 = collect.(zip(coordinates.(Larva.vertices′(geometry2))...)) @@ -84,7 +84,8 @@ function interpolate(geometry1::T, geometry2::T, lambda) where {T<:Larva.SpineGe Larva.PointSeries(interpolate(x1, x2, lambda), interpolate(y1, y2, lambda))) end -function interpolate(tags1::T, tags2::T, lambda) where {T<:Larva.AbstractTags} +function interpolate(tags1, tags2, lambda) + # default nearest-neighbor implementation for tags/labels lambda <= 0.5 ? tags1 : tags2 end function interpolate(state1::T, state2::T, lambda) where {T<:NamedTuple} diff --git a/test/runtests.jl b/test/runtests.jl index ae75658551b42f80bcecc35ca9fbddb4a9e21dda..c5404c77383331aa0acca4980248322cb218fd74 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -915,7 +915,7 @@ if all_tests || "Dataloaders" in ARGS # @test_warn exists but does not catch warnings... Repository(joinpath(dir, "**", "trx.mat")) else - @test_warn "Cannot read labels from file" Repository(joinpath(dir, "**", "trx.mat")) + @test_warn "Cannot preload file" Repository(joinpath(dir, "**", "trx.mat")) end @test length(repo4) == 1 && basename(dirname(Dataloaders.filepaths(repo4)[1])) == "20140918_170215" # @@ -923,7 +923,7 @@ if all_tests || "Dataloaders" in ARGS index4 = capacitysampling(60; seed=347980) loader4 = DataLoader(repo4, window4, index4) buildindex(loader4; verbose=false) - @test index4.sampler.maxcount == 60 && index4.sampler.seed == 347980 + @test index4.sampler.maxcount == 60# && index4.sampler.seed == 347980 @test Set(keys(index4.sampler.selectors)) == Set([:back, :back_large, :back_strong, :back_weak, :cast, :cast_large, :cast_strong, :cast_weak, :hunch, :hunch_large, :hunch_strong, :hunch_weak, :roll, :roll_large, :roll_strong, :roll_weak, :run, :run_large, :run_strong, :run_weak, :stop, :stop_large, :stop_strong, :stop_weak, :small_motion]) counts4 = copy(first(values(index4.targetcounts))) counts4′= pop!(counts4, ["back", "back_weak", "small_motion"]) @@ -938,8 +938,22 @@ if all_tests || "Dataloaders" in ARGS @test lengths == if VERSION < v"1.7" [66, 66, 65, 66, 66, 66, 66, 66, 65, 65, 66, 65, 66, 66, 66, 66, 66, 65, 65, 66, 66, 66, 66, 65, 65, 66, 65, 66, 63, 64, 65, 67, 65, 66, 65, 65, 66, 66, 65, 66, 65, 65, 63, 63, 63, 66, 66, 65, 66, 66, 65, 65, 65, 66, 66, 66, 66, 65, 66, 66, 66, 66, 65, 65, 66, 66, 66, 65, 65, 65, 65, 63, 63, 65, 66, 66, 66, 66, 66, 65, 66, 66, 65, 65, 65, 65, 65, 66, 66, 66, 66, 63, 63, 65, 65, 65, 65, 63, 66, 67, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 65, 65, 65, 66, 65, 66, 66, 66, 65, 65, 65, 65, 65, 66, 65, 63, 63, 63, 63, 63, 66, 65, 66, 65, 66, 64, 65, 66, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 63, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 63, 64, 63, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 64, 65, 64, 65, 66, 66, 65, 65, 66, 66, 66, 65, 65, 66, 66, 66, 66, 66, 65, 66, 66, 66, 65, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 66, 66, 66, 65, 65, 66, 66, 66, 66, 65, 65, 66, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 65, 65, 65, 65, 65, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 65, 66, 66, 65, 65, 65, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 63, 63, 63, 63, 65, 63, 63, 64, 64, 64, 64, 64, 66, 66, 66, 65, 65, 65, 66, 66, 66, 66, 66, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 65, 65, 66, 66, 66, 65, 65, 65, 66, 65, 66, 65, 65, 65, 65, 65, 65, 65, 66, 65, 65, 66, 66, 65, 64, 64, 64, 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 63, 63, 66, 65, 63, 66, 66, 66, 66, 65, 66, 66, 66, 66, 66, 66, 66, 65, 65, 66, 66, 66, 66, 65, 63, 64, 64, 65, 64, 64, 65, 66, 66, 66, 66, 65, 66, 66, 66, 63, 63, 63, 64, 64, 64, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 63, 63, 64, 64, 66, 64, 64, 65, 65, 63, 65, 64, 64, 66, 65, 64, 64, 63] else - [66, 66, 66, 65, 66, 65, 66, 66, 66, 66, 66, 65, 66, 65, 66, 66, 66, 66, 65, 66, 65, 65, 66, 65, 66, 65, 66, 62, 63, 64, 66, 65, 66, 65, 65, 65, 67, 65, 66, 64, 65, 66, 66, 66, 66, 66, 66, 65, 65, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 66, 66, 66, 65, 66, 65, 66, 65, 63, 63, 63, 66, 66, 66, 66, 65, 65, 65, 66, 65, 66, 66, 66, 66, 65, 66, 66, 65, 66, 66, 66, 65, 65, 64, 64, 63, 63, 64, 65, 65, 65, 65, 63, 63, 63, 63, 63, 63, 66, 65, 66, 65, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 65, 66, 66, 65, 65, 66, 65, 65, 66, 65, 65, 65, 65, 66, 64, 64, 64, 63, 66, 66, 66, 63, 62, 65, 65, 66, 65, 65, 66, 64, 64, 64, 65, 63, 63, 63, 63, 63, 63, 64, 63, 65, 66, 66, 65, 66, 65, 66, 65, 66, 65, 65, 66, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 64, 65, 65, 64, 63, 63, 63, 63, 62, 63, 63, 64, 64, 64, 64, 65, 66, 66, 66, 66, 66, 65, 66, 66, 66, 66, 66, 65, 66, 66, 66, 67, 65, 65, 65, 64, 63, 66, 65, 66, 66, 66, 65, 66, 66, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 62, 66, 65, 66, 67, 66, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 65, 66, 66, 66, 65, 65, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 63, 64, 64, 63, 62, 64, 64, 64, 64, 64, 64, 66, 66, 65, 66, 65, 66, 65, 65, 65, 66, 65, 63, 63, 63, 63, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 65, 65, 66, 66, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 64, 63, 63, 64, 65, 65, 64, 64, 64, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 64, 66, 66, 66, 65, 65, 66, 66, 65, 66, 66, 66, 65, 66, 66, 65, 65, 65, 66, 66, 66, 66, 66, 63, 63, 65, 66, 66, 63, 63, 63, 64, 64, 64, 64, 65, 65, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 65, 64, 63, 63, 64, 66, 63, 64, 64, 64, 64, 63, 65, 65, 63, 63, 64] + [65, 65, 66, 64, 66, 66, 67, 66, 66, 66, 65, 66, 66, 65, 66, 65, 65, 66, 66, 65, 66, 66, 65, 65, 66, 66, 66, 65, 65, 66, 66, 66, 66, 66, 65, 65, 66, 64, 64, 66, 66, 66, 66, 66, 66, 65, 66, 65, 64, 63, 63, 64, 65, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 65, 65, 66, 66, 65, 65, 64, 63, 63, 63, 62, 64, 66, 66, 66, 66, 66, 66, 65, 66, 66, 66, 65, 66, 66, 66, 65, 66, 65, 66, 64, 64, 63, 65, 65, 65, 64, 63, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 65, 66, 65, 66, 66, 66, 66, 65, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 63, 63, 66, 66, 66, 66, 66, 65, 65, 66, 66, 64, 63, 63, 64, 66, 65, 65, 63, 64, 64, 64, 63, 63, 63, 64, 64, 65, 66, 65, 65, 65, 64, 66, 65, 65, 66, 65, 66, 66, 65, 65, 65, 66, 65, 65, 65, 65, 65, 66, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 65, 65, 66, 65, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 63, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 64, 65, 65, 67, 65, 65, 63, 63, 63, 62, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 64, 66, 65, 66, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 65, 65, 66, 65, 65, 65, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 64, 64, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 63, 66, 66, 66, 66, 65, 66, 65, 65, 65, 65, 65, 66, 66, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 63, 65, 66, 65, 66, 66, 66, 66, 65, 65, 66, 65, 65, 64, 63, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 63, 63, 64, 64, 64, 65, 65, 64, 65, 63, 63, 64, 63, 66, 66, 65, 65, 66, 64, 65, 65, 64, 66, 66, 66, 65, 66, 66, 65, 66, 66, 65, 65, 68, 66, 66, 65, 66, 66, 66, 63, 66, 66, 66, 66, 66, 63, 63, 63, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 65, 66, 64, 65, 64, 63] end + # + repo5 = Repository(run1) + window5 = TimeWindow(1.5, 10) + index5 = ratiobasedsampling(["back", "bend", "hunch", "run", "stop", "small_action"], 1.5; seed=347980) + loader5 = DataLoader(repo5, window5, index5) + buildindex(loader5; verbose=false) + original_sample_size = length(loader5.index) + samplesize!(loader5.index, 19) + @test original_sample_size == 121 && length(loader5.index) == 19 + lengths′= nothing # set scope + Dataloaders.sample(loader5, :spine) do _, _, _, segments + lengths′= [length(segment.timeseries) for segment in segments] + end + @test all(==(16), lengths′) && length(lengths′) == 19 end end