Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
TaggingBackends
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Nyx
TaggingBackends
Commits
93340eab
Commit
93340eab
authored
2 years ago
by
François LAURENT
Browse files
Options
Downloads
Patches
Plain Diff
implements most of
larvatagger.jl#94
parent
c0ce7942
No related branches found
No related tags found
No related merge requests found
Pipeline
#97533
failed
2 years ago
Stage: test
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/LarvaDatasets.jl
+72
-13
72 additions, 13 deletions
src/LarvaDatasets.jl
src/taggingbackends/explorer.py
+2
-1
2 additions, 1 deletion
src/taggingbackends/explorer.py
src/taggingbackends/main.py
+8
-1
8 additions, 1 deletion
src/taggingbackends/main.py
with
82 additions
and
15 deletions
src/LarvaDatasets.jl
+
72
−
13
View file @
93340eab
...
...
@@ -17,6 +17,7 @@ structure of *trx.mat* files, an alternative implementation is provided by modul
"""
using
PlanarLarvae
,
PlanarLarvae
.
Formats
,
PlanarLarvae
.
Features
,
PlanarLarvae
.
MWT
using
PlanarLarvae
.
Datasets
:
coerce
using
Random
using
HDF5
using
Dates
...
...
@@ -264,14 +265,55 @@ function thresholdedcounts(counts; majorityweight=20)
end
function
write_larva_dataset_hdf5
(
path
,
counts
,
files
,
refs
,
nsteps_before
,
nsteps_after
;
fixmwt
=
false
,
frameinterval
=
nothing
,
fixmwt
=
false
,
frameinterval
=
nothing
,
includeall
=
nothing
,
)
fixmwt
&&
@warn
"`fixmwt=true` is no longer supported"
# this method mutates argument `refs`
refs′
=
Tuple
{
Int
,
Int
,
Int
,
eltype
(
keys
(
counts
))}[]
for
(
label
,
count
)
in
pairs
(
counts
)
for
(
i
,
j
,
k
)
in
shuffle
(
refs
[
label
])[
1
:
count
]
push!
(
refs′
,
(
i
,
j
,
k
,
label
))
T
=
eltype
(
keys
(
counts
))
refs′
=
Tuple
{
Int
,
Int
,
Int
,
T
}[]
if
!
isnothing
(
includeall
)
includeall
=
coerce
(
T
,
includeall
)
if
haskey
(
counts
,
includeall
)
count
=
counts
[
includeall
]
T′
=
Vector
{
Tuple
{
Int
,
Int
,
Int
,
T
}}
specialrefs
=
Dict
{
T
,
T′
}()
for
(
i
,
j
,
k
)
in
refs
[
includeall
]
for
l
in
keys
(
refs
)
if
l
!=
includeall
m
=
findfirst
(
==
((
i
,
j
,
k
)),
refs
[
l
])
if
!
isnothing
(
m
)
push!
(
get!
(
specialrefs
,
l
,
T′
()),
(
i
,
j
,
k
,
l
))
deleteat!
(
refs
[
l
],
m
)
end
end
end
end
if
!
isempty
(
specialrefs
)
@info
"Explicit inclusions based on label
\"
$(includeall)
\"
:"
[
Symbol
(
label
)
=>
length
(
refs″
)
for
(
label
,
refs″
)
in
pairs
(
specialrefs
)]
...
for
(
label
,
count
)
in
pairs
(
counts
)
label
==
includeall
&&
continue
if
label
in
keys
(
specialrefs
)
refs″
=
specialrefs
[
label
]
if
count
<
length
(
refs″
)
refs″
=
shuffle
(
refs″
)[
1
:
count
]
end
refs′
=
vcat
(
refs′
,
refs″
)
count
=
count
-
length
(
refs″
)
end
if
0
<
count
for
(
i
,
j
,
k
)
in
shuffle
(
refs
[
label
])[
1
:
count
]
push!
(
refs′
,
(
i
,
j
,
k
,
label
))
end
end
end
end
end
end
if
isempty
(
refs′
)
for
(
label
,
count
)
in
pairs
(
counts
)
for
(
i
,
j
,
k
)
in
shuffle
(
refs
[
label
])[
1
:
count
]
push!
(
refs′
,
(
i
,
j
,
k
,
label
))
end
end
end
empty!
(
refs
)
# free memory
...
...
@@ -474,6 +516,12 @@ Note that, if `input_data` lists all files, `labelledfiles` is not called and ar
Similarly, if `labelpointers` is defined, `labelcounts` is not called and argument
`timestep_filter` is not used.
*New in version 0.10*: `includeall` specifies a secondary label for systematic inclusion in
the dataset. The time segments with this secondary label are accounted for under the
associated primary label, prior to applying the balancing rule. If the label specified by
`includeall` is found in `labels`, it is considered as primary and `includeall` is ignored.
Generally speaking, `labels` should not include any secondary label.
Known issue: ASCII-compatible string attributes are ASCII encoded and deserialized as `bytes`
by the *h5py* Python library.
"""
...
...
@@ -489,7 +537,8 @@ function write_larva_dataset_hdf5(output_dir::String,
shallow
=
false
,
balance
=
true
,
fixmwt
=
false
,
frameinterval
=
nothing
)
frameinterval
=
nothing
,
includeall
=
"edited"
)
files
=
if
input_data
isa
String
repository
=
input_data
labelledfiles
(
repository
,
chunks
;
selection_rule
=
file_filter
,
shallow
=
shallow
)
...
...
@@ -515,14 +564,23 @@ function write_larva_dataset_hdf5(output_dir::String,
refs
=
labelpointers
counts
=
Dict
{
String
,
Int
}(
label
=>
length
(
pointers
)
for
(
label
,
pointers
)
in
pairs
(
labelpointers
))
end
if
!
isnothing
(
labels
)
labels′
=
p
->
string
(
p
[
1
])
in
labels
missing_labels
=
[
label
for
label
in
labels
if
label
∉
keys
(
counts
)]
if
!
isempty
(
missing_labels
)
@warn
"No occurences found for labels:
\"
$
(join(missing_labels, "
\
",
\"
"
))
\
""
if
isnothing
(
labels
)
labels
=
collect
(
keys
(
counts
))
if
!
isnothing
(
includeall
)
&&
includeall
∈
labels
labels
=
[
label
for
label
in
labels
if
label
!=
includeall
]
end
else
if
!
isnothing
(
includeall
)
&&
includeall
∈
labels
includeall
=
nothing
end
labels′
=
if
isnothing
(
includeall
)
p
->
string
(
p
[
1
])
in
labels
else
p
->
string
(
p
[
1
])
in
labels
||
string
(
p
[
1
])
==
includeall
end
filter!
(
labels′
,
counts
)
filter!
(
labels′
,
refs
)
isempty
(
counts
)
&&
throw
(
"None of specified labels were found"
)
end
if
balance
sample_sizes
,
total_sample_size
=
balancedcounts
(
counts
,
sample_size
)
...
...
@@ -530,12 +588,13 @@ function write_larva_dataset_hdf5(output_dir::String,
isnothing
(
sample_size
)
||
@error
"Argument sample_size not supported for the specified balancing strategy"
sample_sizes
,
total_sample_size
=
thresholdedcounts
(
counts
)
end
@info
"Sample sizes (observed, selected):"
[
Symbol
(
label
)
=>
(
count
,
get
(
sample_sizes
,
label
,
0
))
for
(
label
,
count
)
in
pairs
(
counts
)
]
...
@info
"Sample sizes (observed, selected):"
[
Symbol
(
label
)
=>
(
get
(
count
s
,
label
,
0
)
,
get
(
sample_sizes
,
label
,
0
))
for
label
in
labels
]
...
date
=
Dates
.
format
(
Dates
.
now
(),
"yyyy_mm_dd"
)
output_file
=
joinpath
(
output_dir
,
"larva_dataset_
$(date)
_
$(window_length)
_
$(window_length)
_
$(total_sample_size)
.hdf5"
)
write_larva_dataset_hdf5
(
output_file
,
sample_sizes
,
files
,
refs
,
nsteps_before
,
nsteps_after
;
fixmwt
=
fixmwt
,
frameinterval
=
frameinterval
)
fixmwt
=
fixmwt
,
frameinterval
=
frameinterval
,
includeall
=
includeall
)
h5open
(
output_file
,
"cw"
)
do
h5
attributes
(
h5
[
"samples"
])[
"len_traj"
]
=
window_length
...
...
This diff is collapsed.
Click to expand it.
src/taggingbackends/explorer.py
+
2
−
1
View file @
93340eab
...
...
@@ -474,7 +474,7 @@ run `poetry add {pkg}` from directory: \n
def
generate_dataset
(
self
,
input_files
,
labels
=
None
,
window_length
=
20
,
sample_size
=
None
,
balance
=
True
,
frame_interval
=
None
):
include_all
=
None
,
frame_interval
=
None
):
"""
Generate a *larva_dataset hdf5* file in data/interim/{instance}/
"""
...
...
@@ -485,6 +485,7 @@ run `poetry add {pkg}` from directory: \n
labels
=
labels
,
sample_size
=
sample_size
,
balance
=
balance
,
includeall
=
include_all
,
frameinterval
=
frame_interval
)
def
compile_trxmat_database
(
self
,
input_dir
,
...
...
This diff is collapsed.
Click to expand it.
src/taggingbackends/main.py
+
8
−
1
View file @
93340eab
...
...
@@ -10,6 +10,7 @@ Usage: tagging-backend [train|predict] --model-instance <name>
tagging-backend train ... --sample-size <N> --balancing-strategy <strategy>
tagging-backend train ... --frame-interval <I> --window-length <T>
tagging-backend train ... --pretrained-model-instance <name>
tagging-backend train ... --include-all <secondary-label>
tagging-backend train ... --skip-make-dataset --skip-build-features
tagging-backend predict ... --make-dataset --build-features --sandbox <token>
...
...
@@ -37,7 +38,7 @@ the `make_dataset` module is loaded and this may take quite some time due to
dependencies (e.g. Julia FFI). The `--skip-make-dataset` option makes `train`
truly skip this step; the corresponding module is not loaded.
Since version 0.
8
, `predict` makes `--skip-make-dataset` and
Since version 0.
10
, `predict` makes `--skip-make-dataset` and
`--skip-build-features` the default behavior. As a counterpart, it admits
arguments `--make-dataset` and `--build-features`.
...
...
@@ -68,6 +69,7 @@ def main(fun=None):
pretrained_model_instance
=
None
sandbox
=
False
balancing_strategy
=
'
auto
'
include_all
=
None
unknown_args
=
{}
k
=
2
while
k
<
len
(
sys
.
argv
):
...
...
@@ -113,6 +115,9 @@ def main(fun=None):
elif
sys
.
argv
[
k
]
==
"
--balancing-strategy
"
:
k
=
k
+
1
balancing_strategy
=
sys
.
argv
[
k
]
elif
sys
.
argv
[
k
]
==
"
--include-all
"
:
k
=
k
+
1
include_all
=
sys
.
argv
[
k
]
else
:
unknown_args
[
sys
.
argv
[
k
].
lstrip
(
'
-
'
).
replace
(
'
-
'
,
'
_
'
)]
=
sys
.
argv
[
k
+
1
]
k
=
k
+
1
...
...
@@ -146,6 +151,8 @@ def main(fun=None):
make_dataset_kwargs
[
"
reuse_h5files
"
]
=
True
elif
reuse_h5files
:
logging
.
info
(
"
option --reuse-h5files is ignored in the absence of --trxmat-only
"
)
if
include_all
:
make_dataset_kwargs
[
"
include_all
"
]
=
include_all
backend
.
_run_script
(
backend
.
make_dataset
,
**
make_dataset_kwargs
)
if
build_features
:
backend
.
_run_script
(
backend
.
build_features
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment