diff --git a/src/taggingbackends/explorer.py b/src/taggingbackends/explorer.py
index 11bcae262d173edd9e0dfeb80cbdad6e3ab1bdde..ff726b88eaafd022ab9225649c93fa1ac31bc5fd 100644
--- a/src/taggingbackends/explorer.py
+++ b/src/taggingbackends/explorer.py
@@ -24,6 +24,7 @@ else:
logging.warning("Failed to set PyCall with compiled_modules=False")
try:
from julia import TaggingBackends
+ from julia import PlanarLarvae
except ImportError:
logging.warning(f"TaggingBackends not found in JULIA_PROJECT={JULIA_PROJECT}; \
please see https://gitlab.pasteur.fr/nyx/TaggingBackends#recommended-installation")
@@ -443,11 +444,16 @@ run `poetry add {pkg}` from directory: \n
files = dict(files_)
return files
- def prepare_labels(self, input_files):
+ def prepare_labels(self, input_files, allowed_file_extensions=None,
+ single_input=False):
if isinstance(input_files, dict):
input_files_and_labels = dict()
for parent in input_files.keys():
- input_files_and_labels[parent] = self.prepare_labels(input_files[parent])
+ input_files_and_labels[parent] = self.prepare_labels(
+ input_files[parent],
+ single_input=single_input,
+ allowed_file_extensions=allowed_file_extensions,
+ )
return input_files_and_labels
#
from .data.labels import Labels, labels_file_extension
@@ -456,44 +462,61 @@ run `poetry add {pkg}` from directory: \n
# note: these *metadata* files are pushed by LarvaTagger.jl and form an
# undocumented mechanism to preserve metadata that are generated
# on the Julia side.
- found = 0
+ metadata_found = 0
for file in list(input_files):
if file.name == "metadata":
input_files.remove(file)
with open(file, "r") as f:
labels.metadata = json.load(f)
- if found == 1:
+ if metadata_found == 1:
logging.warning("multiple metadata files found")
- found += 1
- assert 0 < len(input_files)
- # if any labels file is found, assume the other files are its data
- # dependencies, and exclude the labels file from the list of input files
- found = 0
- for file in list(input_files):
- if any([file.name.endswith(ext) for ext in labels_file_extension]):
- labels.input_labels = file
- input_files.remove(file)
- if found == 1:
- logging.warning("multiple label files found")
- found += 1
- assert 0 < len(input_files)
- if labels.metadata and 'filename' in labels.metadata:
- # ensure the file listed in the metadata comes first in the
- # input_files list
+ metadata_found += 1
+ # check whether an input data file has been designated
+ primary_input = None
+ if metadata_found and 'filename' in labels.metadata.keys():
primary_input = labels.metadata['filename']
for file in input_files:
if file.name == primary_input:
primary_input = file
break
if isinstance(primary_input, str):
- logging.warning(f'cannot find file: {primary_input}')
- elif input_files[0] is not primary_input:
- input_files = [file for file in input_files if file is not primary_input]
- input_files.insert(0, primary_input)
+ logging.warning(f'file not found: {primary_input}')
+ primary_input = None
+ # identify existing labels (label files only)
+ labels_found = 0
+ if primary_input is None:
+ for file in input_files:
+ if any(file.name.endswith(ext) for ext in labels_file_extension):
+ labels.input_labels = file
+ if labels_found == 1:
+ logging.warning("multiple label files found")
+ labels_found += 1
+ elif any(primary_input.name.endswith(ext) for ext in labels_file_extension):
+ labels.input_labels = primary_input
+ labels_found = 1
+ # filter out likely unsupported files
+ if allowed_file_extensions:
+ input_files = [f for f in input_files
+ if any(f.name.endswith(ext) for ext in allowed_file_extensions)]
+ # force-identify the input data files
+ assert 0 < len(input_files)
+ if single_input and primary_input is None:
+ primary_input = input_files[0]
+ logging.info(f'selecting file: {primary_input}')
+ input_files = [pathlib.Path(f.source)
+ for f in PlanarLarvae.Formats.find_associated_files(str(primary_input))]
+ # ensure the primary input file comes first in the list of input files
+ if not (primary_input is None or input_files[0] == primary_input):
+ input_files = [f for f in input_files if f != primary_input]
+ input_files.insert(0, primary_input)
#
labels.tracking = input_files
#
- if not labels.metadata:
+ if labels.metadata:
+ if labels.metadata.get('date_time', None) == 'NA':
+ logging.debug('discarding metadata entry "date_time"')
+ del labels.metadata['date_time']
+ else:
file = input_files[0]
labels.metadata = {'filename': file.name}
#