Skip to content
Snippets Groups Projects
Commit fb03e3c1 authored by François  LAURENT's avatar François LAURENT
Browse files

generation scripts and supporting data

parent 0f9739c0
No related branches found
No related tags found
No related merge requests found
Pipeline #89694 passed
This diff is collapsed.
This diff is collapsed.
# run with julia --project=. --threads 18
using TaggingBackends.LarvaDatasets#, TaggingBackends.Trxmat2HDF5
using Random
using HDF5
datadir = "data/interim"
datadir = "/media/flaurent/Elements1/spineh5"
trackers = ["t15" "t5"]
labels = ["back", "cast", "hunch", "roll", "run", "stop_large"]
interpolated = true
recovered = false
#interpolated = false
#recovered = true
function original(files)
files′= String[]
sizehint!(files′, length(files))
for f in files
dir = f.source[2+length(datadir):end-length("_spine.h5")]
file′= joinpath("data/raw", dir, "trx.mat")
if isfile(file′)
push!(files′, file′)
else
@warn "Cannot find file" file=file′
end
end
return files′
end
# for pretraining
#for third_win_length in (25,)# 50, 125) # recovered
for third_win_length in (10, 20, 50) # interp10
for tracker in trackers
#h5files = original(larvah5files(joinpath(datadir, tracker)))
h5files = readlines("data/$(tracker)-fullset.txt")
Random.seed!(0b11010111001001101001110)
training_files = shuffle(h5files)[1:500]
if interpolated
pointers = LarvaDatasets.read_pointers("data/$(tracker)-subset1/pretrain")
dir = "data/$(tracker)-subset1/pretrain/interpolated"
Random.seed!(0b11010111001001101001110)
dataset = write_larva_dataset_hdf5(dir, training_files, third_win_length;
#labels=labels,
labelpointers=pointers,
sample_size=50000, fixmwt=false,
frameinterval=0.1)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in training_files])
end
end
if recovered
# labelpointers is freed by write_larva_dataset_hdf5
pointers = LarvaDatasets.read_pointers("data/$(tracker)-subset1/pretrain")
dir = "data/$(tracker)-subset1/pretrain/recovered"
Random.seed!(0b11010111001001101001110)
dataset = write_larva_dataset_hdf5(dir, training_files, third_win_length;
#labels=labels,
labelpointers=pointers,
sample_size=50000, fixmwt=true)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in training_files])
end
end
end
end
#exit()
# for training
#third_win_length = ceil(Int, 5 * 25 / 3) # 5 s, 25 Hz (recovered)
third_win_length = ceil(Int, 5 * 10 / 3) # 5 s, 10 Hz (interp10)
for tracker in trackers
#h5files = original(larvah5files(joinpath(datadir, tracker)))
h5files = readlines("data/$(tracker)-fullset.txt")
Random.seed!(0b11010111001001101001110)
selectedfiles = shuffle(h5files)[1:600]
training_files = selectedfiles[1:500]
test_files = selectedfiles[501:end]
if interpolated
pointers = LarvaDatasets.read_pointers("data/$(tracker)-subset1/test")
dir = "data/$(tracker)-subset1/test/interpolated"
Random.seed!(0b11010111001001101001110)
dataset = write_larva_dataset_hdf5(dir, test_files, third_win_length;
#labels=labels,
labelpointers=pointers,
sample_size=5000, fixmwt=false,
frameinterval=0.1)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in test_files])
end
end
if recovered
# labelpointers is freed by write_larva_dataset_hdf5
pointers = LarvaDatasets.read_pointers("data/$(tracker)-subset1/test")
dir = "data/$(tracker)-subset1/test/recovered"
Random.seed!(0b11010111001001101001110)
dataset = write_larva_dataset_hdf5(dir, test_files, third_win_length;
#labels=labels,
labelpointers=pointers,
sample_size=5000, fixmwt=true)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in test_files])
end
end
if interpolated
pointers = LarvaDatasets.read_pointers("data/$(tracker)-subset1/train")
dir = "data/$(tracker)-subset1/train/interpolated"
Random.seed!(0b11010111001001101001110)
dataset = write_larva_dataset_hdf5(dir, training_files, third_win_length;
#labels=labels,
labelpointers=pointers,
sample_size=50000, fixmwt=false,
frameinterval=0.1)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in training_files])
end
end
if recovered
pointers = LarvaDatasets.read_pointers("data/$(tracker)-subset1/train")
dir = "data/$(tracker)-subset1/train/recovered"
Random.seed!(0b11010111001001101001110)
dataset = write_larva_dataset_hdf5(dir, training_files, third_win_length;
#labels=labels,
labelpointers=pointers,
sample_size=50000, fixmwt=true)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in training_files])
end
end
end
# run with julia --project=. --threads 18
using TaggingBackends.LarvaDatasets#, TaggingBackends.Trxmat2HDF5
using Random
using HDF5
datadir = "data/interim"
datadir = "/media/flaurent/Elements1/spineh5"
labels = ["back", "cast", "hunch", "roll", "run", "stop_large"]
interpolated = true
recovered = false
function original(files)
files′= String[]
sizehint!(files′, length(files))
for f in files
dir = f.source[2+length(datadir):end-length("_spine.h5")]
file′= joinpath("data/raw", dir, "trx.mat")
if isfile(file′)
push!(files′, file′)
else
@warn "Cannot find file" file=file′
end
end
return files′
end
third_win_length = 20 # 2 s, 10 Hz (interp10)
t5files = readlines("data/t5-fullset.txt")
t15files = readlines("data/t15-fullset.txt")
Random.seed!(0b1101011100100110100111)
shuffle!(t5files)
shuffle!(t15files)
training_files = vcat(t5files[1:2500], t15files[1:2500])
test_files = vcat(t5files[2500:3000], t15files[2500:3000])
if interpolated
#pointers = LarvaDatasets.read_pointers("data/t5-t15-subset2/train")
dir = "data/t5-t15-subset2/train/interpolated"
Random.seed!(0b1101011100100110100111)
dataset = write_larva_dataset_hdf5(dir, training_files, third_win_length;
labels=labels,
#labelpointers=pointers,
sample_size=100000, fixmwt=false,
frameinterval=0.1)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in training_files])
end
#pointers = LarvaDatasets.read_pointers("data/t5-t15-subset2/test")
dir = "data/t5-t15-subset2/test/interpolated"
Random.seed!(0b1101011100100110100111)
dataset = write_larva_dataset_hdf5(dir, test_files, third_win_length;
labels=labels,
#labelpointers=pointers,
sample_size=20000, fixmwt=false,
frameinterval=0.1)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in test_files])
end
end
if recovered
#pointers = LarvaDatasets.read_pointers("data/t5-t15-subset2/train")
dir = "data/t5-t15-subset2/train/recovered"
Random.seed!(0b1101011100100110100111)
dataset = write_larva_dataset_hdf5(dir, training_files, third_win_length;
#labels=labels,
#labelpointers=pointers,
sample_size=100000, fixmwt=true)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in training_files])
end
#pointers = LarvaDatasets.read_pointers("data/t5-t15-subset2/test")
dir = "data/t5-t15-subset2/test/recovered"
Random.seed!(0b1101011100100110100111)
dataset = write_larva_dataset_hdf5(dir, test_files, third_win_length;
labels=labels,
#labelpointers=pointers,
sample_size=20000, fixmwt=true)
h5open(dataset, "cw") do h5
write(h5, "files", [f[length("data/raw/")+1:end] for f in test_files])
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment