Skip to content
Snippets Groups Projects
Commit 011f8b51 authored by François  LAURENT's avatar François LAURENT
Browse files

#72 complete

parent 75710af7
Branches
Tags
No related merge requests found
Pipeline #92888 passed
......@@ -42,32 +42,51 @@ end
resetdata(tagger::Tagger, dir::String) = reset(datadir(tagger, dir))
"""
push(tagger, filepath)
push(tagger, dirpath)
push(tagger, filelistfile)
Copy a data file into the tagger's raw data directory,
or link a data directory as the tagger's raw data directory,
or read relative filepaths from a .txt file and copy all listed files
into the tagger's raw data directory, preserving the directory structure
in the relative paths.
"""
function push(tagger::Tagger, inputdata::String)
destination = nothing
raw_data_dir = datadir(tagger, "raw")
if isdir(inputdata)
inputdata = realpath(inputdata) # strip the end slashes
destination = normpath(joinpath(raw_data_dir, basename(inputdata)))
rm(raw_data_dir; force=true, recursive=true)
destination = raw_data_dir
symlink(inputdata, destination)
else
file = abspath(inputdata)
files = [file]
dir_stem, ext = splitext(file)
if ext == ".spine"
file′= dir_stem * ".outline"
if isfile(file′)
push!(files, file′)
end
elseif ext == ".outline"
file′= dir_stem * ".spine"
if isfile(file′)
push!(files, file′)
end
else
for file′ in Formats.getdependencies(file)
push!(files, file′)
elseif endswith(inputdata, ".txt")
files_by_dir = Dict{String, Vector{String}}()
for file in readlines(inputdata)
parent = dirname(file)
push!(get!(files_by_dir, parent, String[]), abspath(file))
end
for (dir, files) in pairs(files_by_dir)
files = filter(files)
for file in files
@debug "Pushing file to backend" backend=basename(tagger.backend_dir) instance=tagger.model_instance file=file
dest_dir = normpath(joinpath(raw_data_dir, dir))
mkpath(dest_dir)
dest_file = joinpath(dest_dir, basename(file))
src_file = normpath(file)
if dest_file != src_file
open(src_file, "r") do f
open(dest_file, "w") do g
write(g, read(f))
end
end
end
end
end
else
file = abspath(inputdata)
files = filter([file])
for file in files
@info "Pushing file to backend" backend=basename(tagger.backend_dir) instance=tagger.model_instance file=file
dest_file = normpath(joinpath(raw_data_dir, basename(file)))
......@@ -85,16 +104,40 @@ function push(tagger::Tagger, inputdata::String)
return destination
end
function filter(files::Vector{String})
files′= collect(files) # copy
for file in files
dir_stem, ext = splitext(file)
if ext == ".spine"
file′= dir_stem * ".outline"
if isfile(file′)
push!(files′, file′)
end
elseif ext == ".outline"
file′= dir_stem * ".spine"
if isfile(file′)
push!(files′, file′)
end
else
for file′ in Formats.getdependencies(file)
push!(files′, file′)
end
end
end
return files′
end
function pull(tagger::Tagger, dest_dir::String)
proc_data_dir = datadir(tagger, "processed")
isdir(proc_data_dir) || throw("no processed data directory found")
dest_files= String[]
dest_dir = realpath(dest_dir) # strip end slash
dest_files = String[]
for (parent, _, files) in walkdir(proc_data_dir)
if !isempty(files)
parent = relpath(parent, proc_data_dir)
parent= joinpath(dest_dir, relpath(parent, proc_data_dir))
for file in files
src_file = normpath(joinpath(proc_data_dir, parent, file))
dest_file = normpath(joinpath(dest_dir, parent, file))
src_file = normpath(joinpath(parent, file))
dest_file = normpath(joinpath(parent, file))
if dest_file != src_file
open(src_file, "r") do f
open(dest_file, "w") do g
......
......@@ -27,6 +27,10 @@ Options:
--labels=<comma-separated-list> Comma-separated list of behavior tags/labels.
--pretrained-model=<instance> Name of the pretrained encoder (from `pretrained_models` registry).
Note:
<data-path> can be a path to a file or directory. In the case of `predict`,
<data-path> can also be a .txt file listing data files; one relative path per line.
"""
function main(args=ARGS; exit_on_error=true)
......@@ -121,6 +125,6 @@ function main(args=ARGS; exit_on_error=true)
datapath = abspath(data_path)
Taggers.push(tagger, datapath)
predict(tagger; skip_make_dataset=parsed_args["--skip-make-dataset"])
Taggers.pull(tagger, dirname(datapath))
Taggers.pull(tagger, isfile(datapath) ? dirname(datapath) : datapath)
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment