From 31b384c5f55a328bea9f1d6a3eb0dba9e514a125 Mon Sep 17 00:00:00 2001 From: Timothe Jost <timothe.jost@wanadoo.fr> Date: Fri, 25 Oct 2024 23:51:08 +0200 Subject: [PATCH] small changes in multisession palceholder, accessors, and transfer metrics of the file push / pulling system --- requirements.txt | 2 ++ src/one/__init__.py | 2 +- src/one/api.py | 17 +++++++++++------ src/one/files.py | 13 ++++++++++++- src/one/pd_accessors.py | 37 ++++++++++++++++++++++++++++++++++++- 5 files changed, 62 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 424d8dd..738f099 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,5 @@ PyYAML>=6.0.1 PyYAML>=6.0.1 Requests>=2.31.0 tqdm>=4.66.1 +pint +rich diff --git a/src/one/__init__.py b/src/one/__init__.py index 62bffa1..02426ac 100644 --- a/src/one/__init__.py +++ b/src/one/__init__.py @@ -1,5 +1,5 @@ """The Open Neurophysiology Environment (ONE) API""" -__version__ = "2.1.14" +__version__ = "2.1.15" from . import api from .api import ONE diff --git a/src/one/api.py b/src/one/api.py index e77d0d9..4a372d4 100644 --- a/src/one/api.py +++ b/src/one/api.py @@ -68,26 +68,31 @@ class MultiSessionPlaceholder(pd.core.series.Series): *args, project="Adaptation", analysis_group="default", - data_path="", + remote_path="", data_repository=None, + mode="remote", + date="2000-01-01", **kwargs, ): super().__init__(*args, **kwargs) if data_repository is not None: if data_repository == "local": - data_path = one.params.get().LOCAL_ROOT + remote_path = one.params.get().LOCAL_ROOT else: - data_path = self._get_connector().alyx.rest("data-repository", "read", data_repository)["data_path"] - if data_path == "": + remote_path = self._get_connector().alyx.rest("data-repository", "read", data_repository)["data_path"] + + if remote_path == "": raise ValueError( "Data path cannot be empty string. Must either be obtained by supplying data_repository argument, " "or data_path directly" ) - data_path = os.path.normpath(data_path) + remote_path = os.path.normpath(remote_path) self["rel_path"] = os.path.join("multisession", analysis_group) - self["path"] = os.path.join(data_path, self["rel_path"]) + self["local_path"] = os.path.join(os.path.normpath(one.params.get().LOCAL_ROOT), self["rel_path"]) + self["remote_path"] = os.path.join(remote_path, self["rel_path"]) + self["path"] = self["remote_path"] if mode == "remote" else self["local_path"] self["alias"] = analysis_group self["u_alias"] = analysis_group self["projects"] = [project] diff --git a/src/one/files.py b/src/one/files.py index e42dba0..bef8501 100644 --- a/src/one/files.py +++ b/src/one/files.py @@ -226,7 +226,14 @@ class FileTransferManager: transfers_infos = [] for (source, destination), transfers in self.results.groupby(["source_volume", "destination_volume"]): free_space = shutil.disk_usage(destination).free * Quantity("bytes") - transfer_space = transfers["source_filesize"].sum() + + transfered_files = transfers[transfers["decision"] == "transfer"] + overwritten_files = transfers[transfers["decision"] == "overwrite"] + + transfer_space = ( + overwritten_files["destination_filesize"] - overwritten_files["source_filesize"] + ).sum() + transfered_files["source_filesize"].sum() + session_nb = len(transfers.session.unique()) files_nb = len(transfers) @@ -431,6 +438,8 @@ class FileTransferManager: destination_creation_date = destination_stat.st_birthtime destination_modification_date = destination_stat.st_mtime + destination_filesize = destination_stat.st_size * Quantity("bytes") + destination_date = max(destination_creation_date, destination_modification_date) if (difference := abs(source_date - destination_date)) < policies["close_dates_threshold"]: @@ -457,11 +466,13 @@ class FileTransferManager: decision = policies["no_file_exists"] warnings = "File is not existing on the destination, transfering is absolutely okay." destination_date = None + destination_filesize = 0 * Quantity("bytes") record = dict( source_filepath=source_filepath, destination_filepath=destination_filepath, source_filesize=source_filesize, + destination_filesize=destination_filesize, relative_filepath=relative_filepath, destination_exists=destination_exists, source_date=source_date, diff --git a/src/one/pd_accessors.py b/src/one/pd_accessors.py index 249bf25..506bece 100644 --- a/src/one/pd_accessors.py +++ b/src/one/pd_accessors.py @@ -8,12 +8,15 @@ from pathlib import Path class AlyxDataframeAcessorsRegistry: def __init__(self, pandas_obj) -> None: self.pandas_obj = pandas_obj - self._validate(self.pandas_obj) @property def datasets(self): return DatasetsDataframeAcessor(self.pandas_obj) + @property + def sessions(self): + return SessionsDataframeAccessor(self.pandas_obj) + @pd.api.extensions.register_series_accessor("alyx") class AlyxSeriesAcessorsRegistry: @@ -32,12 +35,44 @@ class AlyxSeriesAcessorsRegistry: def files(self): return FilesSeriesAccessor(self.pandas_obj) + @property + def session(self): + return SessionSeriesAccessor(self.pandas_obj) + class PlotSeriesAcessor: def __init__(self, pandas_obj) -> None: self.pandas_obj = pandas_obj +class SessionsDataframeAccessor: + + def __init__(self, pandas_obj) -> None: + self._obj: pd.DataFrame = pandas_obj + + def local_mode(self): + return self._obj.assign(path=self._obj["local_path"]) + + def remote_mode(self): + return self._obj.assign(path=self._obj["remote_path"]) + + +class SessionSeriesAccessor: + + def __init__(self, pandas_obj) -> None: + self._obj: pd.Series = pandas_obj + + def local_mode(self): + series = self._obj.copy() + series["path"] = series["local_path"] + return series + + def remote_mode(self): + series = self._obj.copy() + series["path"] = series["remote_path"] + return series + + class FilesSeriesAccessor: def __init__(self, pandas_obj) -> None: -- GitLab