From 5b9acb8ad40160788eb927836f38acf59ad7d902 Mon Sep 17 00:00:00 2001 From: Timothe Jost <timothe.jost@wanadoo.fr> Date: Fri, 3 May 2024 18:41:22 +0200 Subject: [PATCH] some additions to make multisession guess a bit smarter --- src/pypelines/__init__.py | 2 +- src/pypelines/pipes.py | 22 ++++++++++++++++++++-- src/pypelines/steps.py | 6 +++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/pypelines/__init__.py b/src/pypelines/__init__.py index 40b96c9..c8a9b26 100644 --- a/src/pypelines/__init__.py +++ b/src/pypelines/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.56" +__version__ = "0.0.57" from . import loggs from .pipes import * diff --git a/src/pypelines/pipes.py b/src/pypelines/pipes.py index aca5ce1..0f95a07 100644 --- a/src/pypelines/pipes.py +++ b/src/pypelines/pipes.py @@ -6,6 +6,8 @@ from .disk import BaseDiskObject from functools import wraps import inspect, hashlib +from pandas import DataFrame + from abc import ABCMeta, abstractmethod from typing import Callable, Type, Iterable, Protocol, TYPE_CHECKING, Literal, Dict @@ -208,7 +210,23 @@ class BasePipe(metaclass=ABCMeta): list(self.steps.values()), key=lambda item: item.get_level(selfish=True), reverse=reverse ) + highest_step = None + + if isinstance(session, DataFrame): + # if multisession, we assume we are trying to just load sessions + # that all have reached the same level of requirements. (otherwise, use generate) + # because of that, we use only the first session in the lot to search the highest loadable step + search_on_session = session.iloc[0] + else: + search_on_session = session + for step in ordered_steps: - if step.get_disk_object(session, extra).is_matching(): - return step.load(session, extra) + if step.get_disk_object(search_on_session, extra).is_matching(): + highest_step = step + + if highest_step is not None: # if we found one : it is not None + # we use the load wrapper, wich will dispatch to multissession or not automatically, + # depending on session type (Series or DataFrame) + return highest_step.load(session, extra) + raise ValueError(f"Could not find a {self} object to load for the session {session.alias} with extra {extra}") diff --git a/src/pypelines/steps.py b/src/pypelines/steps.py index 3d71165..e624fc9 100644 --- a/src/pypelines/steps.py +++ b/src/pypelines/steps.py @@ -1,8 +1,9 @@ from functools import wraps, partial, update_wrapper from .loggs import loggedmethod, NAMELENGTH from .arguments import autoload_arguments -import logging, inspect +import logging, inspect +from pandas import DataFrame from dataclasses import dataclass from types import MethodType @@ -258,6 +259,9 @@ class BaseStep: ValueError: If the disk object does not match and has a status message. """ # print("extra in load wrapper : ", extra) + if isinstance(session, DataFrame): + return self.multisession.load(sessions=session, extras=extra) + if extra is None: extra = self.get_default_extra() # print("extra in load wrapper after None : ", extra) -- GitLab