diff --git a/Manifest.toml b/Manifest.toml index 0c46941449e49308b431cdc1f53e9756808dd416..84c6a4d2fb0bf89d829568da8b8273fbc65f14d6 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -312,11 +312,11 @@ version = "1.9.2" [[deps.PlanarLarvae]] deps = ["DelimitedFiles", "HDF5", "JSON3", "LinearAlgebra", "MAT", "Meshes", "OrderedCollections", "Random", "SHA", "StaticArrays", "Statistics", "StatsBase", "StructTypes"] -git-tree-sha1 = "c3397f0c8a6ce76acdbe0517a060e39b90a30db8" -repo-rev = "dev" +git-tree-sha1 = "25dede7c9e34786f3c9a576fc2da3c3448c12d80" +repo-rev = "main" repo-url = "https://gitlab.pasteur.fr/nyx/planarlarvae.jl" uuid = "c2615984-ef14-4d40-b148-916c85b43307" -version = "0.13.0" +version = "0.14.0" [[deps.PrecompileTools]] deps = ["Preferences"] diff --git a/src/LarvaDatasets.jl b/src/LarvaDatasets.jl index a2f1249d32a8e86ecf5de94200eed5ef59d7ba5e..528f877b1c932dd1c10527df62351178058f24b1 100644 --- a/src/LarvaDatasets.jl +++ b/src/LarvaDatasets.jl @@ -26,7 +26,7 @@ using Statistics using Memoization using OrderedCollections -export write_larva_dataset_hdf5, first_stimulus, labelcounts +export write_larva_dataset_hdf5, first_stimulus, labelcounts, check_larva_dataset_hdf5 """ labelcounts(files) @@ -848,4 +848,30 @@ end runid(file) = splitpath(file.source)[end-1] +""" + check_larva_dataset_hdf5(path) + +Read the total label counts and return example time points. +""" +function check_larva_dataset_hdf5(path; print=true) + h5open(path, "r") do h5 + labels = read(h5, "labels") + labelcounts = read(h5, "label_counts") + labelcounts = Dict(Symbol(label) => count for (label, count) in zip(labels, labelcounts)) + print && @info "Labels:" pairs(labelcounts)... + examples = Dict{Symbol, NamedTuple{(:path, :larva_number, :reference_time), Tuple{String, Int, Float64}}}() + g = h5["samples"] + for sampleid in 1:read(attributes(g), "n_samples") + h = g["sample_$sampleid"] + label = Symbol(read(attributes(h), "behavior")) + if label ∉ keys(examples) + examples[label] = (path=read(attributes(h), "path"), larva_number=read(attributes(h), "larva_number"), reference_time=read(attributes(h), "reference_time")) + @info "$(label) example" examples[label]... + length(examples) == length(labels) && break + end + end + return labelcounts, examples + end +end + end