diff --git a/src/LarvaDatasets.jl b/src/LarvaDatasets.jl
index 70d9183f397933c6fab6cc255ed244a95a1efc34..7580a90cc75270e79e07143b50d64bc4444f981a 100644
--- a/src/LarvaDatasets.jl
+++ b/src/LarvaDatasets.jl
@@ -23,6 +23,7 @@ using HDF5
 using Dates
 using Statistics
 using Memoization
+using OrderedCollections
 
 export write_larva_dataset_hdf5, first_stimulus, labelcounts
 
@@ -591,6 +592,9 @@ function write_larva_dataset_hdf5(output_dir::String,
         isnothing(sample_size) || @error "Argument sample_size not supported for the specified balancing strategy"
         sample_sizes, total_sample_size = thresholdedcounts(counts)
     end
+    # ensure label order is preserved
+    sample_sizes = OrderedDict((label => sample_sizes[label]) for label in labels if label in keys(sample_sizes))
+    #
     @info "Sample sizes (observed, selected):" [Symbol(label) => (get(counts, label, 0), get(sample_sizes, label, 0)) for label in labels]...
     date = Dates.format(Dates.now(), "yyyy_mm_dd")
     output_file = joinpath(output_dir, "larva_dataset_$(date)_$(window_length)_$(window_length)_$(total_sample_size).hdf5")
diff --git a/src/taggingbackends/data/dataset.py b/src/taggingbackends/data/dataset.py
index 8504f2755471462f07d83e12a4e6f97ffb3549f3..0f8b9661fe63f8667b1f521df80c444a98248736 100644
--- a/src/taggingbackends/data/dataset.py
+++ b/src/taggingbackends/data/dataset.py
@@ -37,6 +37,10 @@ class LarvaDataset:
         return self._full_set
     """
     *list* of *bytes*: Set of distinct labels.
+
+    If the hdf5 file does not feature a top-level `labels` element that lists
+    the labels, the fallback labels and their order are:
+    RUN, BEND, STOP, HUNCH, BACK, ROLL.
     """
     @property
     def labels(self):