From 0c98393ade6d3f16ffb56b9ef3a73c849708108e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Laurent?= <francois.laurent@posteo.net>
Date: Tue, 12 Sep 2023 10:52:21 +0200
Subject: [PATCH] check_larva_dataset_hdf5

---
 Manifest.toml        |  6 +++---
 src/LarvaDatasets.jl | 28 +++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 0c46941..84c6a4d 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -312,11 +312,11 @@ version = "1.9.2"
 
 [[deps.PlanarLarvae]]
 deps = ["DelimitedFiles", "HDF5", "JSON3", "LinearAlgebra", "MAT", "Meshes", "OrderedCollections", "Random", "SHA", "StaticArrays", "Statistics", "StatsBase", "StructTypes"]
-git-tree-sha1 = "c3397f0c8a6ce76acdbe0517a060e39b90a30db8"
-repo-rev = "dev"
+git-tree-sha1 = "25dede7c9e34786f3c9a576fc2da3c3448c12d80"
+repo-rev = "main"
 repo-url = "https://gitlab.pasteur.fr/nyx/planarlarvae.jl"
 uuid = "c2615984-ef14-4d40-b148-916c85b43307"
-version = "0.13.0"
+version = "0.14.0"
 
 [[deps.PrecompileTools]]
 deps = ["Preferences"]
diff --git a/src/LarvaDatasets.jl b/src/LarvaDatasets.jl
index a2f1249..528f877 100644
--- a/src/LarvaDatasets.jl
+++ b/src/LarvaDatasets.jl
@@ -26,7 +26,7 @@ using Statistics
 using Memoization
 using OrderedCollections
 
-export write_larva_dataset_hdf5, first_stimulus, labelcounts
+export write_larva_dataset_hdf5, first_stimulus, labelcounts, check_larva_dataset_hdf5
 
 """
     labelcounts(files)
@@ -848,4 +848,30 @@ end
 
 runid(file) = splitpath(file.source)[end-1]
 
+"""
+    check_larva_dataset_hdf5(path)
+
+Read the total label counts and return example time points.
+"""
+function check_larva_dataset_hdf5(path; print=true)
+    h5open(path, "r") do h5
+        labels = read(h5, "labels")
+        labelcounts = read(h5, "label_counts")
+        labelcounts = Dict(Symbol(label) => count for (label, count) in zip(labels, labelcounts))
+        print && @info "Labels:" pairs(labelcounts)...
+        examples = Dict{Symbol, NamedTuple{(:path, :larva_number, :reference_time), Tuple{String, Int, Float64}}}()
+        g = h5["samples"]
+        for sampleid in 1:read(attributes(g), "n_samples")
+            h = g["sample_$sampleid"]
+            label = Symbol(read(attributes(h), "behavior"))
+            if label ∉ keys(examples)
+                examples[label] = (path=read(attributes(h), "path"), larva_number=read(attributes(h), "larva_number"), reference_time=read(attributes(h), "reference_time"))
+                @info "$(label) example" examples[label]...
+                length(examples) == length(labels) && break
+            end
+        end
+        return labelcounts, examples
+    end
+end
+
 end
-- 
GitLab