diff --git a/src/taggingbackends/explorer.py b/src/taggingbackends/explorer.py index 11bcae262d173edd9e0dfeb80cbdad6e3ab1bdde..ff726b88eaafd022ab9225649c93fa1ac31bc5fd 100644 --- a/src/taggingbackends/explorer.py +++ b/src/taggingbackends/explorer.py @@ -24,6 +24,7 @@ else: logging.warning("Failed to set PyCall with compiled_modules=False") try: from julia import TaggingBackends + from julia import PlanarLarvae except ImportError: logging.warning(f"TaggingBackends not found in JULIA_PROJECT={JULIA_PROJECT}; \ please see https://gitlab.pasteur.fr/nyx/TaggingBackends#recommended-installation") @@ -443,11 +444,16 @@ run `poetry add {pkg}` from directory: \n files = dict(files_) return files - def prepare_labels(self, input_files): + def prepare_labels(self, input_files, allowed_file_extensions=None, + single_input=False): if isinstance(input_files, dict): input_files_and_labels = dict() for parent in input_files.keys(): - input_files_and_labels[parent] = self.prepare_labels(input_files[parent]) + input_files_and_labels[parent] = self.prepare_labels( + input_files[parent], + single_input=single_input, + allowed_file_extensions=allowed_file_extensions, + ) return input_files_and_labels # from .data.labels import Labels, labels_file_extension @@ -456,44 +462,61 @@ run `poetry add {pkg}` from directory: \n # note: these *metadata* files are pushed by LarvaTagger.jl and form an # undocumented mechanism to preserve metadata that are generated # on the Julia side. - found = 0 + metadata_found = 0 for file in list(input_files): if file.name == "metadata": input_files.remove(file) with open(file, "r") as f: labels.metadata = json.load(f) - if found == 1: + if metadata_found == 1: logging.warning("multiple metadata files found") - found += 1 - assert 0 < len(input_files) - # if any labels file is found, assume the other files are its data - # dependencies, and exclude the labels file from the list of input files - found = 0 - for file in list(input_files): - if any([file.name.endswith(ext) for ext in labels_file_extension]): - labels.input_labels = file - input_files.remove(file) - if found == 1: - logging.warning("multiple label files found") - found += 1 - assert 0 < len(input_files) - if labels.metadata and 'filename' in labels.metadata: - # ensure the file listed in the metadata comes first in the - # input_files list + metadata_found += 1 + # check whether an input data file has been designated + primary_input = None + if metadata_found and 'filename' in labels.metadata.keys(): primary_input = labels.metadata['filename'] for file in input_files: if file.name == primary_input: primary_input = file break if isinstance(primary_input, str): - logging.warning(f'cannot find file: {primary_input}') - elif input_files[0] is not primary_input: - input_files = [file for file in input_files if file is not primary_input] - input_files.insert(0, primary_input) + logging.warning(f'file not found: {primary_input}') + primary_input = None + # identify existing labels (label files only) + labels_found = 0 + if primary_input is None: + for file in input_files: + if any(file.name.endswith(ext) for ext in labels_file_extension): + labels.input_labels = file + if labels_found == 1: + logging.warning("multiple label files found") + labels_found += 1 + elif any(primary_input.name.endswith(ext) for ext in labels_file_extension): + labels.input_labels = primary_input + labels_found = 1 + # filter out likely unsupported files + if allowed_file_extensions: + input_files = [f for f in input_files + if any(f.name.endswith(ext) for ext in allowed_file_extensions)] + # force-identify the input data files + assert 0 < len(input_files) + if single_input and primary_input is None: + primary_input = input_files[0] + logging.info(f'selecting file: {primary_input}') + input_files = [pathlib.Path(f.source) + for f in PlanarLarvae.Formats.find_associated_files(str(primary_input))] + # ensure the primary input file comes first in the list of input files + if not (primary_input is None or input_files[0] == primary_input): + input_files = [f for f in input_files if f != primary_input] + input_files.insert(0, primary_input) # labels.tracking = input_files # - if not labels.metadata: + if labels.metadata: + if labels.metadata.get('date_time', None) == 'NA': + logging.debug('discarding metadata entry "date_time"') + del labels.metadata['date_time'] + else: file = input_files[0] labels.metadata = {'filename': file.name} #