diff --git a/README.md b/README.md index f02fac668287786a1d181f2289bebb1ac8acbd6d..05dc1ac1227e6d76bba0cf76c47966969504e416 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ A step is a processing stage. It takes an input 0 to virtually unlimited inputs A pipe is a collection of Steps. In real life situations, it is very often that the same data structure can be having several Steps that add some data to it. For example, imaging data can be obtained, and segmented into tables of neurons fluorescence over time. As the execution goes further in the processing pipeline, some of the processing steps may calculate the responsivness of the neurons to one or another type of stimulation, obtained in another step. It then seems logical to only add this responsiveness information to the tables data, to not overcrowd the disk with duplicated stages with increasingly detailed/further processed data. The pipe serves this purpose. When a given ``Step`` requires the output of another ``Step``, it looks up wich ``Pipe`` (e.g. general data structure) it is attached to, and loads the most advanced one available in that ``Pipe``. As such, it means a ``Step`` output at level 6 of a ``Pipe``, has to also be valid for the data that it holds, to a ``Step`` that requires the level 1 of this ``Pipe``. (meaning : as a rule of thumb, don't delete data from a ``Pipe`` output with increasing steps, but only add new fields/columns to it.) Below, is the example of a full ``Pipeline`` graph : - + On the same column, the dots represents the different Steps of a ``Pipe``. A ``Pipe`` is then a single column on this graph. Links between steps represent the dependancies between them. @@ -47,13 +47,16 @@ The most usefull parts that this package allows you do to is : To implement a Pipeline, you have to define at least a Step, attached inside a Pipe. Here is a simple example. -You can test this example, and see the result on your own computer. (you require the two csv files located in the ``test/data`` folder of this repository) +You can test this example, and see the result on your own computer. (Two csv files located in the ``test/data`` folder of this repository will be used for demonstration purposes.) ```python from pypelines import Pipeline, BasePipe, BaseStep, Session, pickle_backend from pathlib import Path import pandas, numpy, json +ROIS_URL = "https://raw.githubusercontent.com/JostTim/pypelines/refs/heads/main/tests/data/rois_df.csv" +TRIALS_URL = "https://raw.githubusercontent.com/JostTim/pypelines/refs/heads/main/tests/data/trials_df.csv" + pipeline = Pipeline("my_neurophy_pipeline") @pipeline.register_pipe @@ -65,7 +68,7 @@ class ROIsTablePipe(BasePipe): step_name = "read" def worker(self, session, extra=""): - rois_data = pandas.read_csv("rois_df.csv").set_index("roi#") + rois_data = pandas.read_csv(ROIS_URL).set_index("roi#") rois_data["F_norm"] = rois_data["F_norm"].apply(json.loads) return rois_data @@ -78,7 +81,7 @@ class TrialsTablePipe(BasePipe): step_name = "read" def worker(self, session, extra = ""): - trials_data = pandas.read_csv("trials_df.csv").set_index("trial#") + trials_data = pandas.read_csv(TRIALS_URL).set_index("trial#") return trials_data class AddFrameTimes(BaseStep): diff --git a/src/pypelines/__init__.py b/src/pypelines/__init__.py index 859495c09a066ac3ff37eab59d5fb2fe6042d26a..0fbab59a1040bd8ac619ef186e7d1037bf8124bf 100644 --- a/src/pypelines/__init__.py +++ b/src/pypelines/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.83" +__version__ = "0.0.84" from . import loggs from .pipes import *