diff --git a/src/Taggers.jl b/src/Taggers.jl index 77987c966c9c40edbd7bd80b11e4c021e42100b3..23dfa19dcb9105c7d435a7ba8d5c646e9f671c9f 100644 --- a/src/Taggers.jl +++ b/src/Taggers.jl @@ -17,17 +17,37 @@ end Tagger(backend_dir, model_instance) = Tagger(string(backend_dir), string(model_instance)) function isolate(tagger) - mkpath(joinpath(tagger.backend_dir, "data", "raw")) - rawdatadir = mktempdir(joinpath(tagger.backend_dir, "data", "raw"); cleanup=false) + rawdatadir = joinpath(tagger.backend_dir, "data", "raw") + mkdir(rawdatadir) + rawdatadir = mktempdir(rawdatadir; cleanup=false) Tagger(tagger.backend_dir, tagger.model_instance, basename(rawdatadir), tagger.output_filenames) end -isbackend(path) = isdir(joinpath(path, "models")) && - isfile(joinpath(path, "pyproject.toml")) && - isfile(joinpath(path, "poetry.lock")) +const envdir = get(ENV, "VENV_DIR", "venv") + +function isbackend(path) + bindir = Sys.iswindows() ? "Scripts" : "bin" + return isdir(joinpath(path, "models")) && + isfile(joinpath(path, "pyproject.toml")) && + (isfile(joinpath(path, "poetry.lock")) || + (isfile(joinpath(path, envdir, bindir, "python")) && + isfile(joinpath(path, envdir, bindir, "tagging-backend")))) +end isbackend(tagger::Tagger) = isbackend(tagger.backend_dir) +function tagging_backend_command(path) + bindir = Sys.iswindows() ? "Scripts" : "bin" + python = joinpath(path, envdir, bindir, "python") + tagging_backend_script = joinpath(path, envdir, bindir, "tagging-backend") + if isfile(python) && isfile(tagging_backend_script) + `$python $tagging_backend_script` + else + `poetry run tagging-backend` + end +end +tagging_backend_command(tagger::Tagger) = tagging_backend_command(tagger.backend_dir) + modeldir(tagger::Tagger) = joinpath(tagger.backend_dir, "models", tagger.model_instance) datadir(tagger::Tagger, stage::String) = joinpath(tagger.backend_dir, "data", stage, @@ -205,56 +225,39 @@ function parsekwargs!(args, kwargs) return args end -function train(tagger::Tagger; pretrained_instance=nothing, kwargs...) +function run(tagger, switch, kwargs, extra...) args = ["--model-instance", tagger.model_instance] - if !isnothing(pretrained_instance) - push!(args, "--pretrained-model-instance") - push!(args, pretrained_instance) + for (p, v) in pairs(extra) + if !isnothing(v) + push!(args, p) + push!(args, v) + end end if !isnothing(tagger.sandbox) push!(args, "--sandbox") push!(args, tagger.sandbox) end parsekwargs!(args, kwargs) - ret = run(Cmd(`poetry run tagging-backend train $(args)`; dir=tagger.backend_dir)) + cmd = tagging_backend_command(tagger) + Base.run(Cmd(`$(cmd) $(switch) $(args)`; dir=tagger.backend_dir)) +end + +function train(tagger::Tagger; pretrained_instance=nothing, kwargs...) + ret = run(tagger, "train", kwargs, + "--pretrained-model-instance" => pretrained_instance) @assert isdir(modeldir(tagger)) return ret end -function predict(tagger::Tagger; kwargs...) - args = ["--model-instance", tagger.model_instance] - if !isnothing(tagger.sandbox) - push!(args, "--sandbox") - push!(args, tagger.sandbox) - end - parsekwargs!(args, kwargs) - run(Cmd(`poetry run tagging-backend predict $(args)`; dir=tagger.backend_dir)) -end +predict(tagger::Tagger; kwargs...) = run(tagger, "predict", kwargs) function finetune(tagger::Tagger; original_instance=nothing, kwargs...) - args = ["--model-instance", tagger.model_instance] - if !isnothing(original_instance) - push!(args, "--original-model-instance") - push!(args, original_instance) - end - if !isnothing(tagger.sandbox) - push!(args, "--sandbox") - push!(args, tagger.sandbox) - end - parsekwargs!(args, kwargs) - ret = run(Cmd(`poetry run tagging-backend finetune $(args)`; dir=tagger.backend_dir)) + ret = run(tagger, "finetune", kwargs, + "--original-model-instance" => original_instance) @assert isdir(modeldir(tagger)) return ret end -function embed(tagger::Tagger; kwargs...) - args = ["--model-instance", tagger.model_instance] - if !isnothing(tagger.sandbox) - push!(args, "--sandbox") - push!(args, tagger.sandbox) - end - parsekwargs!(args, kwargs) - run(Cmd(`poetry run tagging-backend embed $(args)`; dir=tagger.backend_dir)) -end +embed(tagger::Tagger; kwargs...) = run(tagger, "embed", kwargs) end # module