From 0c98393ade6d3f16ffb56b9ef3a73c849708108e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Laurent?= <francois.laurent@posteo.net>
Date: Tue, 12 Sep 2023 10:52:21 +0200
Subject: [PATCH] check_larva_dataset_hdf5
---
Manifest.toml | 6 +++---
src/LarvaDatasets.jl | 28 +++++++++++++++++++++++++++-
2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/Manifest.toml b/Manifest.toml
index 0c46941..84c6a4d 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -312,11 +312,11 @@ version = "1.9.2"
[[deps.PlanarLarvae]]
deps = ["DelimitedFiles", "HDF5", "JSON3", "LinearAlgebra", "MAT", "Meshes", "OrderedCollections", "Random", "SHA", "StaticArrays", "Statistics", "StatsBase", "StructTypes"]
-git-tree-sha1 = "c3397f0c8a6ce76acdbe0517a060e39b90a30db8"
-repo-rev = "dev"
+git-tree-sha1 = "25dede7c9e34786f3c9a576fc2da3c3448c12d80"
+repo-rev = "main"
repo-url = "https://gitlab.pasteur.fr/nyx/planarlarvae.jl"
uuid = "c2615984-ef14-4d40-b148-916c85b43307"
-version = "0.13.0"
+version = "0.14.0"
[[deps.PrecompileTools]]
deps = ["Preferences"]
diff --git a/src/LarvaDatasets.jl b/src/LarvaDatasets.jl
index a2f1249..528f877 100644
--- a/src/LarvaDatasets.jl
+++ b/src/LarvaDatasets.jl
@@ -26,7 +26,7 @@ using Statistics
using Memoization
using OrderedCollections
-export write_larva_dataset_hdf5, first_stimulus, labelcounts
+export write_larva_dataset_hdf5, first_stimulus, labelcounts, check_larva_dataset_hdf5
"""
labelcounts(files)
@@ -848,4 +848,30 @@ end
runid(file) = splitpath(file.source)[end-1]
+"""
+ check_larva_dataset_hdf5(path)
+
+Read the total label counts and return example time points.
+"""
+function check_larva_dataset_hdf5(path; print=true)
+ h5open(path, "r") do h5
+ labels = read(h5, "labels")
+ labelcounts = read(h5, "label_counts")
+ labelcounts = Dict(Symbol(label) => count for (label, count) in zip(labels, labelcounts))
+ print && @info "Labels:" pairs(labelcounts)...
+ examples = Dict{Symbol, NamedTuple{(:path, :larva_number, :reference_time), Tuple{String, Int, Float64}}}()
+ g = h5["samples"]
+ for sampleid in 1:read(attributes(g), "n_samples")
+ h = g["sample_$sampleid"]
+ label = Symbol(read(attributes(h), "behavior"))
+ if label ∉ keys(examples)
+ examples[label] = (path=read(attributes(h), "path"), larva_number=read(attributes(h), "larva_number"), reference_time=read(attributes(h), "reference_time"))
+ @info "$(label) example" examples[label]...
+ length(examples) == length(labels) && break
+ end
+ end
+ return labelcounts, examples
+ end
+end
+
end
--
GitLab