diff --git a/Artifacts.toml b/Artifacts.toml index 7b55130b7fbb5f553a1e90812056b21ae0e55992..bcd215e21543d099731a5a365558f9c2544c5c5f 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -51,5 +51,5 @@ git-tree-sha1 = "1dec5e1c33044d972466d03ca5b8816b9861eb5f" lazy = true [[sample_training_dataset.download]] - url = "https://dl.pasteur.fr/fop/Wa2niN3o/sample_training_dataset.tgz" + url = "https://dl.pasteur.fr/fop/DKgeHrAl/sample_training_dataset.tgz" sha256 = "5f1fc870185252a6c4d79ce113c6d98e30107610ee11029642e2e72b7d62c1a0" diff --git a/Project.toml b/Project.toml index ca5795b8266550c0da2ecf8b8ad7b94cfd6e39c7..2d1b72ce8e89e8dff6973700501cb4a4ac3910b7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PlanarLarvae" uuid = "c2615984-ef14-4d40-b148-916c85b43307" authors = ["François Laurent", "Institut Pasteur"] -version = "0.14" +version = "0.15" [deps] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" diff --git a/src/Datasets.jl b/src/Datasets.jl index c61033579bcd65fbfdc7eefe63cc3c51c98c2947..f60ba059c93f5f81144b865b1532232dd17cd1e9 100644 --- a/src/Datasets.jl +++ b/src/Datasets.jl @@ -11,7 +11,7 @@ export Dataset, Run, Track, extract_metadata_from_filepath, sort_metadata, encodelabels, encodelabels!, decodelabels, decodelabels!, mergelabels!, appendlabel!, shareddependencies #, pushdependency!, getdependencies, setdefaultlabel! -const Attributes = AbstractDict{Symbol, Any} +const Attributes = AbstractDict{Symbol} const ConcreteAttributes = Dict{Symbol, Any} # ConcreteAttributes() as default argument value const Dict′ = OrderedDict{Symbol, Any} diff --git a/src/Formats.jl b/src/Formats.jl index 5bf1da2d9b65184e00a0c2d2911738ff0c36c53c..0993c935d8238023b9d87497358934aaf86f6b26 100644 --- a/src/Formats.jl +++ b/src/Formats.jl @@ -12,6 +12,7 @@ using ..LarvaBase: LarvaBase, Spine, Outline, BehaviorTags, derivedtype using ..Chore: read_chore_files, parse_filename, find_chore_files using ..Trxmat: read_trxmat, checktrxmat using ..FIMTrack: read_fimtrack +using ..MaggotUBA: read_larva_dataset_hdf5 using ..Datasets using OrderedCollections: OrderedDict @@ -57,6 +58,12 @@ mutable struct JSONLabels <: PreloadedFile run::Run dependencies::Vector{PreloadedFile} end +mutable struct MaggotUBA <: PreloadedFile + source::String + capabilities + timeseries::LarvaBase.Larvae + run::Run +end larvafile(T, path, capabilities, args...) = T(path, capabilities, @@ -73,6 +80,7 @@ FIMTrack(path::String; framerate=1, pixelsize=nothing, overrides=nothing ) = larvafile(FIMTrack, path, spine_outline, framerate, pixelsize, isnothing(overrides) ? Dict{Symbol, Any}() : overrides) JSONLabels(path::String) = larvafile(JSONLabels, path, spine_outline_tags, PreloadedFile[]) +MaggotUBA(path::String) = larvafile(MaggotUBA, path, (:spine=>Spine, :tags=>BehaviorTags)) """ guessfileformat(filepath; fail=false, shallow=false) @@ -97,6 +105,8 @@ function guessfileformat(path::String; fail::Bool=false, shallow::Bool=false) head(r"^\{\s*\"metadata\":\s*{\s*\"id\":", path, ',') && return JSONLabels elseif ext in fimtrack_ext head(r"^,larva\([0-9]$", path, 8) && return FIMTrack + elseif ext in maggotuba_ext + startswith(basename(path), "larva_dataset_") && return MaggotUBA end fail && throw("Cannot determine format for file: $path") end @@ -105,6 +115,7 @@ const chore_ext = (".outline",".spine") const trxmat_ext = (".mat",) const fimtrack_ext = (".csv",) const labels_ext = (".json",".label",".labels",".nyxlabel") +const maggotuba_ext = (".hdf5",) head(pattern::Regex, s::IOStream, n) = head(s, n) do s !isnothing(match(pattern, s)) @@ -210,7 +221,7 @@ function getlabels(file::Trxmat; fail=false) end return Dict(:names=>file.labels) end -function getlabels(file::JSONLabels; fail=false) +function getlabels(file::Union{JSONLabels, MaggotUBA}; fail=false) isempty(file.run) && load!(file) #labels = file.run.attributes[:labels] _, recordname = Datasets.getlabels(file.run) @@ -243,6 +254,7 @@ getnativerepr(file::Trxmat) = gettimeseries(file) getnativerepr(file::FIMTrack) = getrun(file) # note: labels files with no data dependencies may be broken getnativerepr(file::JSONLabels) = isempty(file.dependencies) ? getrun(file) : getnativerepr(file.dependencies[1]) +getnativerepr(file::MaggotUBA) = getrun(file) """ gettimeseries(preloadedfile) @@ -340,7 +352,8 @@ function astimeseries(track::Track) return timeseries end -function astimeseries(run::Run) +function astimeseries(run::Run; labels2tags=false) + labels2tags && return astimeseries(Formats.labels2tags(run)) track = first(values(run.tracks)) firststate = asnamedtuple(track.states, 1) T = typeof(firststate) @@ -421,6 +434,34 @@ function appendtags(timeseries, run) return newtimeseries end +""" + labels2tags(run) + +Make a copy with labels converted into behavior tags. +""" +function labels2tags(run::Run) + labels, attr = Datasets.getlabels(run) + if labels isa AbstractDict + labels = labels[:names] + end + if eltype(labels) !== Symbol + labels = Symbol.(labels) + end + labels2tags′(label) = labels2tags(labels, label) + # + tracks′= Track[] + for track in values(run.tracks) + records′= OrderedDict(rec===attr ? :tags=>labels2tags′.(vals) : rec=>vals + for (rec, vals) in pairs(track.states)) + track′= Track(track.id, track.attributes, track.timestamps, records′) + push!(tracks′, track′) + end + Run(run.id, run.attributes, tracks′) +end + +labels2tags(labels::Vector{Symbol}, label) = BehaviorTags(Symbol.(labels), + label isa Vector ? (isempty(label) ? Symbol[] : Symbol.(label)) : [Symbol(label)]) + function asrun(runid::Datasets.RunID, timeseries::LarvaBase.Larvae, attributes::Datasets.Attributes=Datasets.ConcreteAttributes()) tracks = Track[] @@ -534,6 +575,11 @@ function load!(file::JSONLabels) return file end +function load!(file::MaggotUBA) + file.run = read_larva_dataset_hdf5(file.source) + return file +end + """ unload!(loadedfile) @@ -589,7 +635,7 @@ function labelledfiles(repository::String=".", chunks::Bool=false; catch continue end - if file′ isa Trxmat || file′ isa JSONLabels + if file′ isa Trxmat || file′ isa JSONLabels || file′ isa MaggotUBA push!(files′, file′) if file′ isa JSONLabels for dep in getdependencies(file′) @@ -636,6 +682,7 @@ function from_mwt(file::JSONLabels) end from_mwt(file.dependencies[1]) end +from_mwt(::MaggotUBA) = false # the track segments are resampled, although originally they were not # asemptytracks(run_or_timeseries) = [Track(trackid, LarvaBase.times(trackdata)) # for (trackid, trackdata) in pairs(run_or_timeseries)] diff --git a/src/MaggotUBA.jl b/src/MaggotUBA.jl new file mode 100644 index 0000000000000000000000000000000000000000..71f1601411dd9db2e7185b8309855105074e8f56 --- /dev/null +++ b/src/MaggotUBA.jl @@ -0,0 +1,100 @@ +""" +Read support for larva_dataset_*.hdf5 interim files from MaggotUBA. + +This module exists for testing purposes and the file format should not be considered as +fully supported. The module provides the core logic for `Formats.MaggotUBA`. + +To visualize track segments in LarvaTagger, first extract segments originating from a +particular tracking data file using `extract_track_segments` with `allow_overlap=false`. +""" +module MaggotUBA + +using ..LarvaBase +using ..Datasets +using HDF5 +using OrderedCollections + +export read_larva_dataset_hdf5, extract_track_segments + +function read_larva_dataset_hdf5(path) + h5open(path, "r") do h5 + labels = read(h5, "labels") + samples = h5["samples"] + nsamples = read_attribute(samples, "n_samples") + tracks = Track[] + for sampleid in 0:nsamples-1 + sample = samples["sample_$sampleid"] + label = read_attribute(sample, "behavior") + @assert label in labels + data = read(sample) + data = permutedims(data, reverse(1:ndims(data))) + t = data[:,1] + spines = data[:,9:end] + spines = [convert(Spine, Path(row)) for row in eachrow(spines)] + track = Track(sampleid, t, Dict( + :labels => fill(label, length(t)), + :spine => spines, + )) + push!(tracks, track) + end + parts = split(basename(path), '_') + runid = join(parts[3:5]) + attr = Dict( + :metadata => Dict(:id => runid, :filename => basename(path)), + :labels => labels, + ) + Run(runid, attr, tracks) + end +end + +function extract_track_segments(outputpath, inputpath, originalfile, tmin=0, tmax=Inf, + allow_overlap=true) + h5open(inputpath, "r") do h5 + labels = read(h5, "labels") + labelcounts = Dict(label=>0 for label in labels) + samples = h5["samples"] + nsamples = read_attribute(samples, "n_samples") + # + segments = Tuple{Int, Float64}[] + for sampleid in 0:nsamples-1 + sample = samples["sample_$sampleid"] + file = read_attribute(sample, "path") + if file == originalfile + reftime = read_attribute(sample, "reference_time") + if tmin <= reftime <= tmax + push!(segments, (sampleid, reftime)) + end + end + end + sampleids = [id for (id, _) in sort(segments; by=last)] + # + h5open(outputpath, "w") do h5′ + g = create_group(h5′, "samples") + t = 0 + extractedsamples = 0 + for sampleid in sampleids + sample = samples["sample_$sampleid"] + if !allow_overlap + data = read(sample) + data = permutedims(data, reverse(1:ndims(data))) + t <= data[1,1] || continue + t = data[end,1] + end + name = "sample_$extractedsamples" + @info "Copying sample_$sampleid as" name + copy_object(sample, g, name) + labelcounts[read_attribute(g[name], "behavior")] += 1 + extractedsamples += 1 + end + for attr in ("len_traj", "len_pred", "frame_interval") + attributes(g)[attr] = read_attribute(samples, attr) + end + attributes(g)["n_samples"] = extractedsamples + copy_object(h5, "labels", h5′, "labels") + h5′["label_counts"] = [labelcounts[label] for label in labels] + end + return labelcounts + end +end + +end diff --git a/src/PlanarLarvae.jl b/src/PlanarLarvae.jl index 6051579869b1939788928e9d6ecda72c83012854..bf1edf305183fd67bb4f9c52d07ef79c5add0a64 100644 --- a/src/PlanarLarvae.jl +++ b/src/PlanarLarvae.jl @@ -5,6 +5,7 @@ include("Chore.jl") include("Trxmat.jl") include("Datasets.jl") include("FIMTrack.jl") +include("MaggotUBA.jl") include("Formats.jl") include("Features.jl") include("MWT.jl") diff --git a/src/records.jl b/src/records.jl index 961507e06214bf3b5a9992fbd2f8b8b7048538b7..e99f56c6ee5171a0cdc2b81c9c1441d0cd96d90e 100644 --- a/src/records.jl +++ b/src/records.jl @@ -1,7 +1,7 @@ """ Larva identification number as found in *.outline*, *.spine* and *trx.mat* files. """ -const LarvaID = UInt16 +const LarvaID = UInt32 """ Timestamp (alias for `Float64`). diff --git a/test/runtests.jl b/test/runtests.jl index c5404c77383331aa0acca4980248322cb218fd74..7202e3dc581773a0d58d934a02c5ce9f4a2c9efc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -377,7 +377,7 @@ if all_tests || "Datasets" in ARGS show_str = replace(show_str, "Vector{T} where T" => "Vector") @test show_str * "\n" == """Dataset with 1 run: Run("20150701_105504") with 1 track -Track(0x0200, OrderedDict{Symbol, Any}(), [66.675, 66.755, 66.856, 66.949, 67.039, 67.115, 67.196], OrderedDict{Symbol, Vector}(:labels => ["crawl", "bend", "back", "stop", "undecided", "hunch", "roll"])) +Track(0x00000200, OrderedDict{Symbol, Any}(), [66.675, 66.755, 66.856, 66.949, 67.039, 67.115, 67.196], OrderedDict{Symbol, Vector}(:labels => ["crawl", "bend", "back", "stop", "undecided", "hunch", "roll"])) with metadata: OrderedDict{Symbol, Any} with 3 entries: :genotype => "FCF_attP2_1500062" :effector => "UAS_Chrimson_Venus_X_0070"