diff --git a/Artifacts.toml b/Artifacts.toml
index 7b55130b7fbb5f553a1e90812056b21ae0e55992..bcd215e21543d099731a5a365558f9c2544c5c5f 100644
--- a/Artifacts.toml
+++ b/Artifacts.toml
@@ -51,5 +51,5 @@ git-tree-sha1 = "1dec5e1c33044d972466d03ca5b8816b9861eb5f"
 lazy = true
 
   [[sample_training_dataset.download]]
-    url = "https://dl.pasteur.fr/fop/Wa2niN3o/sample_training_dataset.tgz"
+    url = "https://dl.pasteur.fr/fop/DKgeHrAl/sample_training_dataset.tgz"
     sha256 = "5f1fc870185252a6c4d79ce113c6d98e30107610ee11029642e2e72b7d62c1a0"
diff --git a/Project.toml b/Project.toml
index ca5795b8266550c0da2ecf8b8ad7b94cfd6e39c7..2d1b72ce8e89e8dff6973700501cb4a4ac3910b7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "PlanarLarvae"
 uuid = "c2615984-ef14-4d40-b148-916c85b43307"
 authors = ["François Laurent", "Institut Pasteur"]
-version = "0.14"
+version = "0.15"
 
 [deps]
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
diff --git a/src/Datasets.jl b/src/Datasets.jl
index c61033579bcd65fbfdc7eefe63cc3c51c98c2947..f60ba059c93f5f81144b865b1532232dd17cd1e9 100644
--- a/src/Datasets.jl
+++ b/src/Datasets.jl
@@ -11,7 +11,7 @@ export Dataset, Run, Track, extract_metadata_from_filepath, sort_metadata,
        encodelabels, encodelabels!, decodelabels, decodelabels!, mergelabels!, appendlabel!,
        shareddependencies #, pushdependency!, getdependencies, setdefaultlabel!
 
-const Attributes = AbstractDict{Symbol, Any}
+const Attributes = AbstractDict{Symbol}
 const ConcreteAttributes = Dict{Symbol, Any} # ConcreteAttributes() as default argument value
 const Dict′ = OrderedDict{Symbol, Any}
 
diff --git a/src/Formats.jl b/src/Formats.jl
index 5bf1da2d9b65184e00a0c2d2911738ff0c36c53c..0993c935d8238023b9d87497358934aaf86f6b26 100644
--- a/src/Formats.jl
+++ b/src/Formats.jl
@@ -12,6 +12,7 @@ using ..LarvaBase: LarvaBase, Spine, Outline, BehaviorTags, derivedtype
 using ..Chore: read_chore_files, parse_filename, find_chore_files
 using ..Trxmat: read_trxmat, checktrxmat
 using ..FIMTrack: read_fimtrack
+using ..MaggotUBA: read_larva_dataset_hdf5
 using ..Datasets
 using OrderedCollections: OrderedDict
 
@@ -57,6 +58,12 @@ mutable struct JSONLabels <: PreloadedFile
     run::Run
     dependencies::Vector{PreloadedFile}
 end
+mutable struct MaggotUBA <: PreloadedFile
+    source::String
+    capabilities
+    timeseries::LarvaBase.Larvae
+    run::Run
+end
 
 larvafile(T, path, capabilities, args...) = T(path,
                                               capabilities,
@@ -73,6 +80,7 @@ FIMTrack(path::String; framerate=1, pixelsize=nothing, overrides=nothing
         ) = larvafile(FIMTrack, path, spine_outline, framerate, pixelsize,
                       isnothing(overrides) ? Dict{Symbol, Any}() : overrides)
 JSONLabels(path::String) = larvafile(JSONLabels, path, spine_outline_tags, PreloadedFile[])
+MaggotUBA(path::String) = larvafile(MaggotUBA, path, (:spine=>Spine, :tags=>BehaviorTags))
 
 """
     guessfileformat(filepath; fail=false, shallow=false)
@@ -97,6 +105,8 @@ function guessfileformat(path::String; fail::Bool=false, shallow::Bool=false)
         head(r"^\{\s*\"metadata\":\s*{\s*\"id\":", path, ',') && return JSONLabels
     elseif ext in fimtrack_ext
         head(r"^,larva\([0-9]$", path, 8) && return FIMTrack
+    elseif ext in maggotuba_ext
+        startswith(basename(path), "larva_dataset_") && return MaggotUBA
     end
     fail && throw("Cannot determine format for file: $path")
 end
@@ -105,6 +115,7 @@ const chore_ext = (".outline",".spine")
 const trxmat_ext = (".mat",)
 const fimtrack_ext = (".csv",)
 const labels_ext = (".json",".label",".labels",".nyxlabel")
+const maggotuba_ext = (".hdf5",)
 
 head(pattern::Regex, s::IOStream, n) = head(s, n) do s
     !isnothing(match(pattern, s))
@@ -210,7 +221,7 @@ function getlabels(file::Trxmat; fail=false)
     end
     return Dict(:names=>file.labels)
 end
-function getlabels(file::JSONLabels; fail=false)
+function getlabels(file::Union{JSONLabels, MaggotUBA}; fail=false)
     isempty(file.run) && load!(file)
     #labels = file.run.attributes[:labels]
     _, recordname = Datasets.getlabels(file.run)
@@ -243,6 +254,7 @@ getnativerepr(file::Trxmat) = gettimeseries(file)
 getnativerepr(file::FIMTrack) = getrun(file)
 # note: labels files with no data dependencies may be broken
 getnativerepr(file::JSONLabels) = isempty(file.dependencies) ? getrun(file) : getnativerepr(file.dependencies[1])
+getnativerepr(file::MaggotUBA) = getrun(file)
 
 """
     gettimeseries(preloadedfile)
@@ -340,7 +352,8 @@ function astimeseries(track::Track)
     return timeseries
 end
 
-function astimeseries(run::Run)
+function astimeseries(run::Run; labels2tags=false)
+    labels2tags && return astimeseries(Formats.labels2tags(run))
     track = first(values(run.tracks))
     firststate = asnamedtuple(track.states, 1)
     T = typeof(firststate)
@@ -421,6 +434,34 @@ function appendtags(timeseries, run)
     return newtimeseries
 end
 
+"""
+    labels2tags(run)
+
+Make a copy with labels converted into behavior tags.
+"""
+function labels2tags(run::Run)
+    labels, attr = Datasets.getlabels(run)
+    if labels isa AbstractDict
+        labels = labels[:names]
+    end
+    if eltype(labels) !== Symbol
+        labels = Symbol.(labels)
+    end
+    labels2tags′(label) = labels2tags(labels, label)
+    #
+    tracks′= Track[]
+    for track in values(run.tracks)
+        records′= OrderedDict(rec===attr ? :tags=>labels2tags′.(vals) : rec=>vals
+            for (rec, vals) in pairs(track.states))
+        track′= Track(track.id, track.attributes, track.timestamps, records′)
+        push!(tracks′, track′)
+    end
+    Run(run.id, run.attributes, tracks′)
+end
+
+labels2tags(labels::Vector{Symbol}, label) = BehaviorTags(Symbol.(labels),
+    label isa Vector ? (isempty(label) ? Symbol[] : Symbol.(label)) : [Symbol(label)])
+
 function asrun(runid::Datasets.RunID, timeseries::LarvaBase.Larvae,
         attributes::Datasets.Attributes=Datasets.ConcreteAttributes())
     tracks = Track[]
@@ -534,6 +575,11 @@ function load!(file::JSONLabels)
     return file
 end
 
+function load!(file::MaggotUBA)
+    file.run = read_larva_dataset_hdf5(file.source)
+    return file
+end
+
 """
     unload!(loadedfile)
 
@@ -589,7 +635,7 @@ function labelledfiles(repository::String=".", chunks::Bool=false;
             catch
                 continue
             end
-            if file′ isa Trxmat || file′ isa JSONLabels
+            if file′ isa Trxmat || file′ isa JSONLabels || file′ isa MaggotUBA
                 push!(files′, file′)
                 if file′ isa JSONLabels
                     for dep in getdependencies(file′)
@@ -636,6 +682,7 @@ function from_mwt(file::JSONLabels)
     end
     from_mwt(file.dependencies[1])
 end
+from_mwt(::MaggotUBA) = false # the track segments are resampled, although originally they were not
 
 # asemptytracks(run_or_timeseries) = [Track(trackid, LarvaBase.times(trackdata))
 #                                     for (trackid, trackdata) in pairs(run_or_timeseries)]
diff --git a/src/MaggotUBA.jl b/src/MaggotUBA.jl
new file mode 100644
index 0000000000000000000000000000000000000000..71f1601411dd9db2e7185b8309855105074e8f56
--- /dev/null
+++ b/src/MaggotUBA.jl
@@ -0,0 +1,100 @@
+"""
+Read support for larva_dataset_*.hdf5 interim files from MaggotUBA.
+
+This module exists for testing purposes and the file format should not be considered as
+fully supported. The module provides the core logic for `Formats.MaggotUBA`.
+
+To visualize track segments in LarvaTagger, first extract segments originating from a
+particular tracking data file using `extract_track_segments` with `allow_overlap=false`.
+"""
+module MaggotUBA
+
+using ..LarvaBase
+using ..Datasets
+using HDF5
+using OrderedCollections
+
+export read_larva_dataset_hdf5, extract_track_segments
+
+function read_larva_dataset_hdf5(path)
+    h5open(path, "r") do h5
+        labels = read(h5, "labels")
+        samples = h5["samples"]
+        nsamples = read_attribute(samples, "n_samples")
+        tracks = Track[]
+        for sampleid in 0:nsamples-1
+            sample = samples["sample_$sampleid"]
+            label = read_attribute(sample, "behavior")
+            @assert label in labels
+            data = read(sample)
+            data = permutedims(data, reverse(1:ndims(data)))
+            t = data[:,1]
+            spines = data[:,9:end]
+            spines = [convert(Spine, Path(row)) for row in eachrow(spines)]
+            track = Track(sampleid, t, Dict(
+                :labels => fill(label, length(t)),
+                :spine => spines,
+            ))
+            push!(tracks, track)
+        end
+        parts = split(basename(path), '_')
+        runid = join(parts[3:5])
+        attr = Dict(
+            :metadata => Dict(:id => runid, :filename => basename(path)),
+            :labels => labels,
+        )
+        Run(runid, attr, tracks)
+    end
+end
+
+function extract_track_segments(outputpath, inputpath, originalfile, tmin=0, tmax=Inf,
+    allow_overlap=true)
+    h5open(inputpath, "r") do h5
+        labels = read(h5, "labels")
+        labelcounts = Dict(label=>0 for label in labels)
+        samples = h5["samples"]
+        nsamples = read_attribute(samples, "n_samples")
+        #
+        segments = Tuple{Int, Float64}[]
+        for sampleid in 0:nsamples-1
+            sample = samples["sample_$sampleid"]
+            file = read_attribute(sample, "path")
+            if file == originalfile
+                reftime = read_attribute(sample, "reference_time")
+                if tmin <= reftime <= tmax
+                    push!(segments, (sampleid, reftime))
+                end
+            end
+        end
+        sampleids = [id for (id, _) in sort(segments; by=last)]
+        #
+        h5open(outputpath, "w") do h5′
+            g = create_group(h5′, "samples")
+            t = 0
+            extractedsamples = 0
+            for sampleid in sampleids
+                sample = samples["sample_$sampleid"]
+                if !allow_overlap
+                    data = read(sample)
+                    data = permutedims(data, reverse(1:ndims(data)))
+                    t <= data[1,1] || continue
+                    t = data[end,1]
+                end
+                name = "sample_$extractedsamples"
+                @info "Copying sample_$sampleid as" name
+                copy_object(sample, g, name)
+                labelcounts[read_attribute(g[name], "behavior")] += 1
+                extractedsamples += 1
+            end
+            for attr in ("len_traj", "len_pred", "frame_interval")
+                attributes(g)[attr] = read_attribute(samples, attr)
+            end
+            attributes(g)["n_samples"] = extractedsamples
+            copy_object(h5, "labels", h5′, "labels")
+            h5′["label_counts"] = [labelcounts[label] for label in labels]
+        end
+        return labelcounts
+    end
+end
+
+end
diff --git a/src/PlanarLarvae.jl b/src/PlanarLarvae.jl
index 6051579869b1939788928e9d6ecda72c83012854..bf1edf305183fd67bb4f9c52d07ef79c5add0a64 100644
--- a/src/PlanarLarvae.jl
+++ b/src/PlanarLarvae.jl
@@ -5,6 +5,7 @@ include("Chore.jl")
 include("Trxmat.jl")
 include("Datasets.jl")
 include("FIMTrack.jl")
+include("MaggotUBA.jl")
 include("Formats.jl")
 include("Features.jl")
 include("MWT.jl")
diff --git a/src/records.jl b/src/records.jl
index 961507e06214bf3b5a9992fbd2f8b8b7048538b7..e99f56c6ee5171a0cdc2b81c9c1441d0cd96d90e 100644
--- a/src/records.jl
+++ b/src/records.jl
@@ -1,7 +1,7 @@
 """
 Larva identification number as found in *.outline*, *.spine* and *trx.mat* files.
 """
-const LarvaID = UInt16
+const LarvaID = UInt32
 
 """
 Timestamp (alias for `Float64`).
diff --git a/test/runtests.jl b/test/runtests.jl
index c5404c77383331aa0acca4980248322cb218fd74..7202e3dc581773a0d58d934a02c5ce9f4a2c9efc 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -377,7 +377,7 @@ if all_tests || "Datasets" in ARGS
     show_str = replace(show_str, "Vector{T} where T" => "Vector")
     @test show_str * "\n" == """Dataset with 1 run:
 Run("20150701_105504") with 1 track
-Track(0x0200, OrderedDict{Symbol, Any}(), [66.675, 66.755, 66.856, 66.949, 67.039, 67.115, 67.196], OrderedDict{Symbol, Vector}(:labels => ["crawl", "bend", "back", "stop", "undecided", "hunch", "roll"]))
+Track(0x00000200, OrderedDict{Symbol, Any}(), [66.675, 66.755, 66.856, 66.949, 67.039, 67.115, 67.196], OrderedDict{Symbol, Vector}(:labels => ["crawl", "bend", "back", "stop", "undecided", "hunch", "roll"]))
 with metadata: OrderedDict{Symbol, Any} with 3 entries:
   :genotype => "FCF_attP2_1500062"
   :effector => "UAS_Chrimson_Venus_X_0070"