diff --git a/larva/io/chore/backend/dataframe/__init__.py b/larva/io/chore/backend/dataframe/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/larva/io/chore/backend/dataframe/check/spine_outline_sync.py b/larva/io/chore/backend/dataframe/check/spine_outline_sync.py index 2341f3d3a8984573ca12903681b630e11422fd53..37e8384e6affa87af7bcbf62d09e90dbf5bcb355 100644 --- a/larva/io/chore/backend/dataframe/check/spine_outline_sync.py +++ b/larva/io/chore/backend/dataframe/check/spine_outline_sync.py @@ -3,14 +3,17 @@ from larva.qc import qc_check @qc_check(input_type='datasource') def spine_outline_sync(ctrl, datasource): - from ..spine_outline import SpineOutline + from larva.io.chore.spine_outline import ( + find_associated_file, load_spine, load_outline, + ) file1 = datasource - file2 = SpineOutline(file1).find_associated_file() + file2 = find_associated_file(file1) if file2 is None: ctrl.report(desc='no associated file found') else: - df1 = SpineOutline.load(file1) - df2 = SpineOutline.load(file2) + # load_outline should work with spine files + df1 = load_outline(file1) + df2 = load_outline(file2) nrows1 = df1.shape[0] nrows2 = df2.shape[0] if nrows1 != nrows2: diff --git a/larva/io/chore/backend/dataframe/purepy.py b/larva/io/chore/backend/dataframe/purepy.py index 26d6e0956f4f321ebefd4c5d6b6f60f1b31955d9..8979c70eca4dedc939973271da0c42ad39431bdd 100644 --- a/larva/io/chore/backend/dataframe/purepy.py +++ b/larva/io/chore/backend/dataframe/purepy.py @@ -102,7 +102,7 @@ def rtrim_nan(filt): trimmed = filt[:stop] return trimmed -def write_outline(filepath, df, append=False, float_format='.3f'): +def save_outline(filepath, df, append=False, float_format='.3f'): if float_format.startswith('%'): float_format = float_format[1:] date_time, larva_id = df['date_time'], df['larva_id'] @@ -116,10 +116,11 @@ def write_outline(filepath, df, append=False, float_format='.3f'): f.write(fmt.format(newline, date_time, larva_id, *points)) newline = '\n' -from .check import \ +from .check import ( spine_outline_default_qc_checks, spine_specific_default_qc_checks, outline_specific_default_qc_checks, + ) from larva.qc.file import QCFileBackend @@ -135,5 +136,5 @@ outline_backend = QCFileBackend( spine_outline_default_qc_checks + outline_specific_default_qc_checks, ) -__all__ = ['load_outline', 'write_outline', 'read_larvae', +__all__ = ['load_outline', 'save_outline', 'read_larvae', 'spine_backend', 'outline_backend'] diff --git a/larva/io/chore/spine_outline.py b/larva/io/chore/spine_outline.py index 734d62abbc174700fe5b54c89075937e0a7184d2..29fde70d3e438d61d0b7b299904d4372b234107e 100644 --- a/larva/io/chore/spine_outline.py +++ b/larva/io/chore/spine_outline.py @@ -1,5 +1,5 @@ import os -from larva.qc.file import QCFile +from larva.qc.file import QCFile, load, check, save from .backend.dataframe import purepy as default @@ -41,14 +41,14 @@ class Outline(SpineOutline): SpineOutline.__init__(self, filepath) self.backend = default.outline_backend -load_spine = Spine.load -load_outline = Outline.load +load_spine = load(Spine) +load_outline = load(Outline) -check_spine = Spine.check -check_outline = Outline.check +check_spine = check(Spine) +check_outline = check(Outline) -save_spine = Spine.save -save_outline = Outline.save +save_spine = save(Spine) +save_outline = save(Outline) __all__ = [ 'Spine', diff --git a/larva/io/trx/backend/__init__.py b/larva/io/trx/backend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/larva/io/trx/backend/hdf5storage.py b/larva/io/trx/backend/hdf5storage.py new file mode 100644 index 0000000000000000000000000000000000000000..f6a405c74ad62e337e0dc6b2f14aa9185d9e6ff2 --- /dev/null +++ b/larva/io/trx/backend/hdf5storage.py @@ -0,0 +1,22 @@ +import hdf5storage + +def load_trx(filepath, varnames=[]): + if varnames: + if isinstance(varnames, str): + varnames = varnames.split() + trx = hdf5storage.loadmat(filepath, + variable_names=[ 'trx/'+varname for varname in varnames ]) + else: + trx = hdf5storage.loadmat(filepath) + trx = { varname[4:]: trx[varname] for varname in trx } + return trx + +from larva.qc.file import QCFileBackend + +trx_backend = QCFileBackend( + load_trx, + NotImplemented, + [], + ) + +__all__ = ['load_trx', 'trx_backend'] diff --git a/larva/io/trx/trx_mat.py b/larva/io/trx/trx_mat.py index 8ff910c310ca52de1c8d75ef37e4a45f6d6588ec..11190138ddd561befdf4cd978386e63eb8eefc56 100644 --- a/larva/io/trx/trx_mat.py +++ b/larva/io/trx/trx_mat.py @@ -1,14 +1,19 @@ -from larva.qc.file import QCFile -import numpy as np - +from larva.qc.file import QCFile, asfun, load, check, save import h5py -import hdf5storage +from .backend import hdf5storage as default class Trx(QCFile): + __slots__ = () + + def __init__(self, filepath=None): + QCFile.__init__(self, filepath) + self.backend = default.trx_backend - def _list_varnames(self): + def list_varnames(self): filepath = self.filepath + if not filepath: + raise ValueError('filepath is not defined') try: store = h5py.File(filepath, 'r') except OSError: @@ -19,22 +24,10 @@ class Trx(QCFile): finally: store.close() - @classmethod - def list_varnames(cls, filepath=None): - if isinstance(cls, type): - if filepath is None: - raise ValueError('filepath is not defined') - else: - return Trx(filepath)._list_varnames() - else: - self = cls - return self._list_varnames() - - -list_trx_varnames = Trx.list_varnames -load_trx = Trx.load -check_trx = Trx.check -save_trx = Trx.save # not implemented +list_trx_varnames = asfun(Trx, Trx.list_varnames) +load_trx = load(Trx) +check_trx = check(Trx) +save_trx = save(Trx) # not implemented __all__ = [ 'Trx', diff --git a/larva/qc/file.py b/larva/qc/file.py index 1ce7cee8e1cbc697414decf76653a61625f9abab..916123fc407ed96db7c54799bc14c81f5d037d1f 100644 --- a/larva/qc/file.py +++ b/larva/qc/file.py @@ -3,12 +3,12 @@ from .check import QCChecks class QCFileBackend: - __slots__ = '_load', '_write', '_default_qc_checks' + __slots__ = '_load', '_save', '_default_qc_checks' - def __init__(self, load, write, default_qc_checks): + def __init__(self, load, save, default_qc_checks): if not callable(load): raise TypeError('load is not callable') - if not callable(save): + if not (callable(save) or save is NotImplemented): raise TypeError('save is not callable') self._load = load self._save = save @@ -76,11 +76,8 @@ class QCFile: raise AttributeError('not a list') @classmethod - def reify(cls, *args, filepath=None, copy=False, **kwargs): + def reify(cls, *args, filepath=None, **kwargs): """ - Switch from class method to regular method, suitable for - `load` and especially `check`. - The first positional argument, if of type *str* or *Path*, is interpreted as *filepath*, and a *TypeError* exception is raised if keyworded *filepath* is also defined. @@ -90,40 +87,18 @@ class QCFile: filepath = args[0] else: raise TypeError('filepath is specified twice') - if isinstance(cls, type): - self = cls(filepath=filepath) - elif copy: - self = cls - cls = type(self) - if filepath is None: - filepath = self.filepath - new = cls(filepath=filepath) - new.backend = self.backend - self = new - else: - self = cls - if filepath: - if self.filepath: - if filepath != self.filepath: - raise ValueError('filepath is already defined') - else: - self.filepath = filepath + self = cls(filepath=filepath) if not self.filepath: raise ValueError('filepath is not defined') return self - @classmethod - def load(cls, filepath=None, **kwargs): - self = cls.reify(filepath=filepath) - return self.backend.load(**kwargs) + def load(self, *args, **kwargs): + return self.backend.load(self.filepath, *args, **kwargs) - @classmethod - def check(cls, data=None, policy=None, **kwargs): - kwargs['copy'] = False - self = cls.reify(data, **kwargs) + def check(self, data=None, policy=None, **kwargs): # make data lazily available - if data is None or isinstance(data, (str, Path)): - data = self.load + if data is None: + data = self.load(**kwargs) # run the checks, fix the data and collect the diagnoses diagnoses = dict() for check in self.qc: @@ -134,59 +109,39 @@ class QCFile: # return data, diagnoses - @classmethod - def save(cls, *args, **kwargs): - may_be_path = lambda a: isinstance(a, (str, Path)) - may_be_data = lambda a: not may_be_path(a) - # parse the arguments. - # first positional argument can be: - # - filepath, or - # - data if filepath is defined as a keyword argument; - # second positional argument can only be data; - # a variable cannot be defined twice, be it - # a positional or keyword argument; - args = list(args) - if args and may_be_path(args[0]): - filepath = args.pop(0) - if 'filepath' in kwargs: - raise ValueError('filepath is defined twice') - else: - filepath = kwargs.pop('filepath', None) - if args and may_be_data(args[0]): - data = args.pop(0) - if 'data' in kwargs: - raise ValueError('data is defined twice') - else: - data = kwargs.pop('data', None) - if args: - raise ValueError(f'cannot interprete arguments: {args}') - # extract missing information from the parent object - # if save is called as a regular method - if isinstance(cls, type): - if data is None: - raise ValueError('data is not defined') - if not filepath: - raise ValueError('filepath is not defined') - output_file = cls.reify(filepath=filepath) - else: - self = cls - if filepath is None: - if data is None: - raise ValueError('neither filepath nor data are defined') - output_file = self.reify(filepath=filepath, copy=True) - else: - if data is None: - input_file = self - data = input_file.load() - else: - output_file = self - # save - output_file.backend.save(data, *args, **kwargs) + def save(self, data, *args, **kwargs): + self.backend.save(self.filepath, data, *args, **kwargs) def __getattr__(self, attr): return self.qc.__getattr__(attr) +def asfun(cls, met): + def _fun(filepath, *args, **kwargs): + self = cls(filepath) + return met(self, *args, **kwargs) + _fun.__doc__ = met.__doc__ + return _fun + +load = lambda cls: asfun(cls, cls.load) +save = lambda cls: asfun(cls, cls.save) + +def check(cls): + def _check(filepath_or_data, policy=None, **kwargs): + if isinstance(filepath_or_data, (str, Path)): + filepath = filepath_or_data + self = cls(filepath) + data = None + else: + self = cls() + data = filepath_or_data + return self.check(data, policy, **kwargs) + _check.__doc__ = cls.check.__doc__ + return _check + __all__ = [ 'QCFileBackend', 'QCFile', + 'load', + 'check', + 'save', ] diff --git a/setup.py b/setup.py index cee371929e2978cc5d1f517dd109d515b06bdd91..028346dd0779ccd86a7044b2a0064a09682481b1 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from setuptools import setup from codecs import open import os.path -install_requires = ['h5py', 'scipy', 'numba'] +install_requires = ['h5py', 'hdf5storage'] extras_require = {} setup_requires = ['pytest-runner'] tests_require = ['pytest']