Commit 34c82e3f authored by François  LAURENT's avatar François LAURENT
Browse files

Formats.labelledfiles + more checks in Formats.guessfileformat

parent 169541c3
Pipeline #84695 failed with stage
in 4 minutes and 29 seconds
......@@ -5,6 +5,7 @@ version = "0.5.0"
[deps]
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
Meshes = "eacbb407-ea5a-433e-ab97-5258b1ca43fa"
......
......@@ -62,7 +62,7 @@ using PlanarLarvae.FIMTrack
sample_file = joinpath(artifact"sample_fimtrack_tables", "collision_sample_table.csv")
tracks = read_fimtrack((:spine=>Spine, :outline=>Outline), sample_file; fps=30)
tracks = read_fimtrack((:spine=>Spine, :outline=>Outline), sample_file; framerate=30)
example_track = tracks[1]
example_state = example_track[3.6]
......
......@@ -14,9 +14,11 @@ using ..Trxmat: read_trxmat
using ..FIMTrack: read_fimtrack
using ..Datasets
using OrderedCollections: OrderedDict
using HDF5
export guessfileformat, preload, load, drop_record!, gettimeseries, astimeseries,
getrun, asrun, getmetadata, getlabels, getdependencies, appendtags, TIME_PRECISION
getrun, asrun, getmetadata, getlabels, getdependencies, appendtags, TIME_PRECISION,
labelledfiles
const TIME_PRECISION = 0.001
......@@ -60,8 +62,8 @@ larvafile(T, path, capabilities, args...) = T(path,
Run("NA"),
args...)
spine_outline = (:spine=>Spine, :outline=>Outline)
spine_outline_tags = (:spine=>Spine, :outline=>Outline, :tags=>BehaviorTags)
const spine_outline = (:spine=>Spine, :outline=>Outline)
const spine_outline_tags = (:spine=>Spine, :outline=>Outline, :tags=>BehaviorTags)
Chore(path::String) = larvafile(Chore, path, spine_outline)
Trxmat(path::String; tags=Symbol[]) = larvafile(Trxmat, path, spine_outline_tags, tags)
......@@ -69,7 +71,7 @@ FIMTrack(path::String; framerate=1) = larvafile(FIMTrack, path, spine_outline, f
JSONLabels(path::String) = larvafile(JSONLabels, path, spine_outline_tags, PreloadedFile[])
"""
guessfileformat(filepath; fail=false)
guessfileformat(filepath; fail=false, shallow=false)
Read the first bytes of a file and guess its format.
......@@ -77,21 +79,44 @@ Formats are returned as `PreloadedFile` concrete types.
If the format cannot be guessed and `fail` is `false`, `nothing` is returned; else an error
is thrown.
`shallow=true` allows skipping a time-consuming check for the presence of the `trx` record
in *trx.mat* files.
"""
function guessfileformat(path::String; fail::Bool=false)
f = open(path, "r")
try
s = String(read(f, 19))
(s == "MATLAB 7.3 MAT-file") && return Trxmat
isnothing(match(r"^[0-9]{8}_[0-9}{6}\s\[0-9]*$", s)) || return Chore
isnothing(match(r"^\{\s*\"", s)) || return JSONLabels
isnothing(match(r"^,larva\([0-9]+", s)) || return FIMTrack
finally
close(f)
function guessfileformat(path::String; fail::Bool=false, shallow::Bool=false)
_, ext = splitext(path)
if ext in trxmat_ext
head(==("MATLAB 7.3 MAT-file"), path, 19) && (shallow || checktrxmat(path)) && return Trxmat
elseif ext in chore_ext
head(r"^[0-9]{8}_[0-9]{6}\s[0-9]$", path, 17) && return Chore
elseif ext in labels_ext
head(r"^\{\s*\"metadata\":\s*{\s*\"id\":", path, ',') && return JSONLabels
elseif ext in fimtrack_ext
head(r"^,larva\([0-9]$", path, 8) && return FIMTrack
end
fail && throw("cannot determine file format")
end
const chore_ext = (".outline",".spine")
const trxmat_ext = (".mat",)
const fimtrack_ext = (".csv",)
const labels_ext = (".json",".label",".labels")
head(pattern::Regex, s::IOStream, n) = head(s, n) do s
!isnothing(match(pattern, s))
end
head(f::Function, s::IOStream, n::Int) = f(String(read(s, n)))
head(f::Function, s::IOStream, c::Char) = f(readuntil(s, c))
head(f, path::String, n) = open(path) do s
head(f, s, n)
end
function checktrxmat(path)
h5open(path) do file
haskey(file, "trx")
end
end
"""
preload(filepath)
preload(filetype, filepath)
......@@ -132,7 +157,7 @@ end
# Trxmat(path; tags=tags)
# end
preload(T::DataType, path::String; kwargs...) = T(path)
preload(path::String; kwargs...) = preload(guessfileformat(path; fail=true), path; kwargs...)
preload(path::String; shallow::Bool=false, kwargs...) = preload(guessfileformat(path; fail=true, shallow=shallow), path; kwargs...)
function drop_record!(file::PreloadedFile, record::Symbol)
@assert isempty(file.timeseries)
......@@ -245,7 +270,7 @@ function getrun′(file)
end
getrun(file; shallow::Bool=true) = getrun′(file)
function getrun(file::JSONLabels; shallow::Bool=true)
shallow || throw("deep load not implemented for json labels files")
shallow || throw("deep load not implemented for json label files")
getrun′(file)
end
......@@ -443,4 +468,63 @@ function load!(file::JSONLabels)
return file
end
"""
labelledfiles(repository, chunks=false)
labelledfiles(...; selection_rule=nothing, shallow=false)
List all labelled files (*trx.mat* and *.label* files) found in a repository.
If multiple JSON *.label* files with common data dependencies are found in a directory,
only the last *.label* file is listed, unless `chunks` is `true` (the various files are
assumed to address different tracks in the same data file).
Data dependencies are also omitted.
The returned files are of type `PreloadedFile`.
`selection_rule` is a boolean function that takes a filename as input argument, and returns
`true` if the file is to be included. This can be used to speed up the filtering of labelled
files.
The `shallow` argument is passed to [`guessfileformat`](@ref).
"""
function labelledfiles(repository::String=".", chunks::Bool=false;
selection_rule=nothing, shallow=false)
files = Vector{PreloadedFile}[]
for (parent, _, children) in walkdir(repository)
deps = Dict{String, Vector{JSONLabels}}()
files′= PreloadedFile[]
for file in children
isnothing(selection_rule) || selection_rule(file) || continue
file′ = try
preload(joinpath(parent, file); shallow=shallow)
catch
continue
end
if file′ isa Trxmat || file′ isa JSONLabels
push!(files′, file′)
if file′ isa JSONLabels
for dep in getdependencies(file′)
push!(get!(deps, dep, JSONLabels[]), file′)
end
end
end
end
conflicting = JSONLabels[]
if !chunks
for (dep, files″) in pairs(deps)
if 1 < length(files″)
for f in files″[1:end-1]
push!(conflicting, f)
end
@info "Multiple label files for a same data dependency" dir=parent dependency=basename(dep) labelfiles=[basename(f.source) for f in files″]
end
end
end
filter!(f -> f.source keys(deps) && f conflicting, files′)
push!(files, files′)
end
return Iterators.flatten(files)
end
end
......@@ -616,5 +616,18 @@ if all_tests || "Formats" in ARGS
end
anyfile = make_test_data("chore_auto_labels")
dir = dirname(anyfile)
@test Set([basename(f.source) for f in labelledfiles(dir)]) == Set(["chore_auto.labels", "fimtrack_manual.labels", "trxmat_exported.labels"])
testfile1 = joinpath(dir, "empty_test_file.json")
testfile2 = joinpath(dir, "copy.json")
Base.Filesystem.touch(testfile1)
Base.Filesystem.cp(anyfile, testfile2; force=true)
files = @test_logs (:info, "Multiple label files for a same data dependency") match_mode=:any labelledfiles(dir)
@test Set([basename(f.source) for f in files]) == Set(["copy.json", "fimtrack_manual.labels", "trxmat_exported.labels"])
for tmp in (testfile1, testfile2)
Base.Filesystem.rm(tmp)
end
end
end
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment