Skip to content
Snippets Groups Projects
Commit 26a1e452 authored by Timothe Jost's avatar Timothe Jost
Browse files

almost working base feature set

parent 012b0095
No related branches found
No related tags found
No related merge requests found
Showing
with 175 additions and 24 deletions
No preview for this file type
Metadata-Version: 2.1
Name: pypelines
Version: 0.0.1
Summary: Framework to organize processing code outputs to/from disk, processing chaining and versionning with a common easy to use api
Home-page: https://gitlab.pasteur.fr/haisslab/data-management/pypelines
Author: Timothé Jost-MOUSSEAU
Author-email: timothe.jost-mousseau@pasteur.com
License: MIT
Classifier: Development Status :: 3 - Alpha
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
README.md
setup.py
pypelines/__init__.py
pypelines/disk.py
pypelines/examples.py
pypelines/loggs.py
pypelines/multisession.py
pypelines/pickle_backend.py
pypelines/pipe.py
pypelines/pipeline.py
pypelines/sessions.py
pypelines/step.py
pypelines/versions.py
pypelines.egg-info/PKG-INFO
pypelines.egg-info/SOURCES.txt
pypelines.egg-info/dependency_links.txt
pypelines.egg-info/top_level.txt
tests/__init__.py
tests/tests.py
\ No newline at end of file
pypelines
tests
......@@ -3,4 +3,5 @@ __version__ = "0.0.1"
from .pipe import *
from .pipeline import *
from .step import *
from .disk import *
from .versions import *
\ No newline at end of file
No preview for this file type
File added
File added
No preview for this file type
File added
File added
No preview for this file type
File added
No preview for this file type
File added
No preview for this file type
File added
import os
import os, re
from . sessions import Session
import pickle
from typing import Callable, Type, Iterable, Protocol, TYPE_CHECKING
......@@ -16,9 +17,8 @@ class BaseDiskObject :
disk_version = None
disk_step = None
def __init__(self, session : Session, step : BaseStep, extra = "") -> None :
def __init__(self, session : Session, step : "BaseStep", extra = "") -> None :
self.step = None
self.session = session
self.step = step
self.extra = extra
......@@ -29,7 +29,7 @@ class BaseDiskObject :
"""sets self.disk_version and self.disk_step"""
...
def save(self, object):
def save(self, data : OutputData) -> None:
...
def load(self) -> OutputData:
......@@ -42,7 +42,6 @@ class BaseDiskObject :
def version_exist(self, session : Session):
"""returns True if the file found had a stamp for that step corresponding to the current version. False otherwise"""
return self.step.version == self.disk_version
class PickleObject(BaseDiskObject) :
......@@ -50,35 +49,88 @@ class PickleObject(BaseDiskObject) :
file_prefix = "preproc_data"
extension = "pickle"
current_suffixes = ""
remove = True
current_disk_file = None
def make_file_prefix_path(self):
prefix_path = self.file_prefix + "." + self.step.pipe_name
rigid_pattern = self.file_prefix
def parse_extra(self,extra):
extra = extra.strip(".").replace(".",r"\.")
return r"\." + extra if extra else ""
pattern = ""
def make_file_name_pattern(self):
if self.step.pipe.single_step :
pass
steps_patterns = []
if self.step.use_version :
pass
for key in sorted(self.step.pipe.steps.keys()):
step = self.step.pipe.steps[key]
steps_patterns.append( fr"(?:{step.step_name})" )
flexible_pattern = self.f
steps_patterns = "|".join(steps_patterns)
def check_disk(self):
search_path = os.path.join(self.session.path, self.collection)
version_pattern = fr"(?:\.(?P<version>[^\.]*))?"
step_pattern = fr"(?:\.(?P<step_name>{steps_patterns}){version_pattern})?"
extra = self.parse_extra(self.extra)
pattern = self.file_prefix + r"\." + self.step.pipe_name + step_pattern + extra + r"\." + self.extension
print(pattern)
return pattern
def get_file_name(self):
def save(self, object):
...
extra = self.parse_extra(self.extra)
version_string = "." + self.step.version if self.step.use_version else ""
filename = self.file_prefix + "." + self.step.pipe_name + "." + self.step.step_name + version_string + extra + "." + self.extension
return filename
def load(self) -> OutputData:
...
def check_disk(self):
search_path = os.path.join(self.session.path, os.path.sep.join(self.collection))
print(search_path)
matching_files = files(search_path, re_pattern = self.make_file_name_pattern(), relative = True, levels = 0)
print(matching_files)
if len(matching_files):
keys = ["step_name","version"]
expected_values = {"step_name" : self.step.step_name, "version" : self.step.version if self.step.use_version else None}
pattern = re.compile(self.make_file_name_pattern())
match_datas = []
for index, file in enumerate(matching_files) :
match = pattern.search(file)
match_data = {}
for key in keys :
match_data[key] = match.group(key)
#TODO : catch here with KeyError and return an error that is more explicit, saying key is not present in the pattern
if expected_values == match_data :
self.current_disk_file = os.path.join(search_path, matching_files[index])
return True
match_datas.append(match_data)
else :
if len(match_datas) == 1:
print(f"A single partial match was found. Please make sure it is consistant with expected behaviour. Expected : {expected_values} , Found : {match_datas[0]}")
self.current_disk_file = os.path.join(search_path, matching_files[0])
return True
print(f"More than one partial match were found. Cannot auto select. Expected : {expected_values} , Found : {match_datas}")
return False
return False
def get_full_path(self):
full_path = os.path.join(self.session.path, os.path.sep.join(self.collection), self.get_file_name() )
return full_path
def save(self, data : OutputData):
new_full_path = self.get_full_path()
with open(new_full_path, "wb") as f :
pickle.dump(data, f)
if self.current_disk_file is not None and self.current_disk_file != new_full_path and self.remove :
os.remove(self.current_disk_file)
self.current_disk_file = new_full_path
def load(self) -> OutputData:
if self.current_disk_file is None :
raise IOError("Could not find a file to load. Either no file was found on disk, or you forgot to run 'check_disk()'")
with open(self.current_disk_file, "rb") as f :
return pickle.load(f)
import natsort
from . import extract
def files(input_path, re_pattern = None, relative = False,levels = -1, get = "files", parts = "all", sort = True):
"""
......@@ -103,11 +155,11 @@ def files(input_path, re_pattern = None, relative = False,levels = -1, get = "fi
for subdir in os.listdir(_input_path):
fullpath = os.path.join(_input_path,subdir)
if os.path.isfile(fullpath):
if (get == "all" or get == "files") and (re_pattern is None or extract.qregexp(re_pattern,fullpath)):
if (get == "all" or get == "files") and (re_pattern is None or qregexp(re_pattern,fullpath)):
output_list.append(os.path.normpath(fullpath))
else :
if (get == "all" or get == "dirs" or get == "folders") and (re_pattern is None or extract.qregexp(re_pattern,fullpath)):
if (get == "all" or get == "dirs" or get == "folders") and (re_pattern is None or qregexp(re_pattern,fullpath)):
output_list.append(os.path.normpath(fullpath))
if current_level < levels:
current_level += 1
......@@ -129,4 +181,62 @@ def files(input_path, re_pattern = None, relative = False,levels = -1, get = "fi
def qregexp(regex, input_line, groupidx=None, matchid=None , case=False):
"""
Simplified implementation for matching regular expressions. Utility for python's built_in module re .
Tip:
Design your patterns easily at [Regex101](https://regex101.com/)
Args:
input_line (str): Source on wich the pattern will be searched.
regex (str): Regex pattern to match on the source.
**kwargs (optional):
- groupidx : (``int``)
group index in case there is groups. Defaults to None (first group returned)
- matchid : (``int``)
match index in case there is multiple matchs. Defaults to None (first match returned)
- case : (``bool``)
`False` / `True` : case sensitive regexp matching (default ``False``)
Returns:
Bool , str: False or string containing matched content.
Warning:
This function returns only one group/match.
"""
if case :
matches = re.finditer(regex, input_line, re.MULTILINE|re.IGNORECASE)
else :
matches = re.finditer(regex, input_line, re.MULTILINE)
if matchid is not None :
matchid = matchid +1
for matchnum, match in enumerate(matches, start = 1):
if matchid is not None :
if matchnum == matchid :
if groupidx is not None :
for groupx, groupcontent in enumerate(match.groups()):
if groupx == groupidx :
return groupcontent
return False
else :
MATCH = match.group()
return MATCH
else :
if groupidx is not None :
for groupx, groupcontent in enumerate(match.groups()):
if groupx == groupidx :
return groupcontent
return False
else :
MATCH = match.group()
return MATCH
return False
\ No newline at end of file
......@@ -6,7 +6,7 @@ from .step import stepmethod
class ExamplePipeline(BasePipeline):
...
example_pipeline = ExamplePipeline()
example_pipeline = ExamplePipeline("example")
@example_pipeline.register_pipe
class ExamplePipe(PicklePipe):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment