synthetic_data.py 12.6 KB
Newer Older
amichaut's avatar
amichaut committed
1
2
3
4
5
##########################################################################
# Track Analyzer - Quantification and visualization of tracking data     #
# Authors: Arthur Michaut                                                #
# Copyright 2016-2019 Harvard Medical School and Brigham and             #
#                          Women's Hospital                              #
6
# Copyright 2019-2022 Institut Pasteur and CNRS–UMR3738                  #
amichaut's avatar
amichaut committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# See the COPYRIGHT file for details                                     #
#                                                                        #
# This file is part of Track Analyzer package.                           #
#                                                                        #
# Track Analyzer is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by   #
# the Free Software Foundation, either version 3 of the License, or      #
# (at your option) any later version.                                    #
#                                                                        #
# Track Analyzer is distributed in the hope that it will be useful,      #
# but WITHOUT ANY WARRANTY; without even the implied warranty of         #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           #
# GNU General Public License for more details .                          #
#                                                                        #
# You should have received a copy of the GNU General Public License      #
# along with Track Analyzer (COPYING).                                   #
# If not, see <https://www.gnu.org/licenses/>.                           #
##########################################################################

amichaut's avatar
amichaut committed
26
27
import os.path as osp

amichaut's avatar
amichaut committed
28
import matplotlib.pyplot as plt
29
import matplotlib as mpl
amichaut's avatar
amichaut committed
30
import numpy as np
amichaut's avatar
amichaut committed
31
import pandas as pd
32
from skimage.color import rgb2gray
amichaut's avatar
amichaut committed
33
34
35
36
from skimage.util import img_as_ubyte
import seaborn as sns
import tifffile as tifff

37
38
from track_analyzer import prepare as tpr

amichaut's avatar
amichaut committed
39
# Plotting parameters
40
41
color_list = [c['color'] for c in list(plt.rcParams['axes.prop_cycle'])] + sns.color_palette("Set1", n_colors=9,
                                                                                             desat=.5)
amichaut's avatar
amichaut committed
42

43
mpl.use('agg')
amichaut's avatar
amichaut committed
44

45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# parameters
traj_num = 500  # number of tracks
frame_num = 5  # number of frame
dim = 2  # dimension
lims = [[0, 500], [0, 500]]  # space limits
periodic = False  # space periodicity
random = False  # initial position spacing
space = {'dim': dim, 'lims': lims, 'periodic': periodic, 'random': random}
pos0_lims = [[100, 400], [100, 400]]  # initial position limits
diff_field = {'cst': 1}  # random motion amplitude
vel_field = [{'a': 0.01, 'p': 2, 'b': 250, 'c': -0.01, 'q': 2, 'd': 250},  # vx = x^2 - y^2 (centered)
             {'a': 0.01, 'p': 2, 'b': 250, 'c': -0.01, 'q': 2, 'd': 250},  # vy = x^2 - y^2 (centered)
             ]
timescale = 1
lengthscale = 1


def init_config(traj_num=traj_num, frame_num=frame_num, space=space, pos0_lims=pos0_lims, diff_field=diff_field,
                vel_field=vel_field, timescale=timescale, lengthscale=lengthscale):
    """
    Make config_default
    """
    config_default = {'traj_num': traj_num,
                      'frame_num': frame_num,
                      'space': space,
                      'pos0_lims': pos0_lims,
                      'diff_field': diff_field,
                      'vel_field': vel_field,
                      'lengthscale': lengthscale,
                      'timescale': timescale,
                      }
    return config_default


def get_config(data_dir):
    """
    Load from config file 'config.csv' if it exists, else return None
    """

    config_fn = osp.join(data_dir, 'config.csv')
    if osp.exists(config_fn):
        config = tpr.load_dict(config_fn)
    else:
        config = None

    return config


def make_pos0(pos0_lims=pos0_lims, traj_num=traj_num, random=True):
    """
    Generate list of initial positions at random position within boudaries given by pos0_lims 
    """
    pos0_list = []

    if random:
        for i in range(traj_num):
            pos0 = np.array([np.random.uniform(pos0_lims_[0], pos0_lims_[1]) for pos0_lims_ in pos0_lims])
            pos0_list.append(pos0)
    else:
        # create a rectangle array of N_ positions evenly spaced 
        # height/width = N_h/N_w and N_h*N_w = N_ 
        width = np.abs(pos0_lims[0][1] - pos0_lims[0][0])
        height = np.abs(pos0_lims[1][1] - pos0_lims[1][0])
        N_ = int(np.sqrt(traj_num)) ** 2  # number of positions needs to be a square number
        N_h = int(np.sqrt(N_) * height / width)
        N_w = int(N_ / N_h)

        # meshgrid
        X, Y = np.meshgrid(np.linspace(pos0_lims[0][0], pos0_lims[0][1], N_w),
                           np.linspace(pos0_lims[1][0], pos0_lims[1][1], N_h))
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                pos0_list.append(np.array([X[i, j], Y[i, j]]))

    return pos0_list


def polynom_mapping(pos, params={'cst': 0}, space=space):
    """
    Evaluate polynomial at position pos.
    Polynomial function: (a*(x-b))^p + (c*(y-d))^q + (e*(z-f))^r + cst
    Useful to generate a velocity field with polynomial expression
    """

    # put parameter to either if missing
    for p in list('abcdefpqr') + ['cst']:
        if p not in params.keys():
            params[p] = 0

    # check dimension of position compatible with space dimension
    pos_dim = len(pos)
    if pos_dim > space['dim']:
        raise Exception('position dimension larger than space dimension')

    value = params['cst']

    if pos_dim == 1:
        value += (params['a'] * (pos[0] - params['b'])) ** params['p']

    elif pos_dim == 2:
        value += (params['a'] * (pos[0] - params['b'])) ** params['p']
        value += (params['c'] * (pos[1] - params['d'])) ** params['q']

    elif pos_dim == 3:
        value += (params['a'] * (pos[0] - params['b'])) ** params['p']
        value += (params['c'] * (pos[1] - params['d'])) ** params['q']
        value += (params['e'] * (pos[2] - params['f'])) ** params['r']

    return value


def n_polynom_mapping(pos, params_list=[{'cst': 0}], space=space):
    """
    n-dimension polynomial evaluation at position pos using polynom_mapping()
    """

    # check there is a set of parameters along each space dimension
    if len(params_list) != space['dim']:
        raise Exception('params_list must have the same size as space dimension')

    vector = []
    for i in range(space['dim']):
        vector.append(polynom_mapping(pos, params=params_list[i], space=space))

    return vector


def export_vel_fields(vel_field, pos_list, data_dir, space=space):
    """
    Export velocity curl and div fields evaluated at an list of positions.
    vel_field (generated by n_polynom_mapping) is a list of n elements (n=2 or 3), each element being a spatial component
    """

    # check 2D velocity field is 2D
    if len(vel_field) < 2:
        raise Exception("Need a 2D velocity field")

    # check missing parameters in vel_field
    for i in range(len(vel_field)):
        for p in list('abcdefpqr'):
            if p not in vel_field[i].keys():
                vel_field[i][p] = 0

    # get velocity list
    vel_list = []
    for pos in pos_list:
        vel_list.append(n_polynom_mapping(pos, params_list=vel_field, space=space))

    # calculate 2D curl and div
    curl_list = []
    div_list = []
    for pos in pos_list:
        # curl
        curl = dx_polynom(vel_field[1], pos) - dy_polynom(vel_field[0], pos)  # Dx_vy - Dy_vx
        curl_list.append(curl)
        # div
        div = dx_polynom(vel_field[0], pos) + dy_polynom(vel_field[1], pos)  # Dx_vx + Dy_vy
        div_list.append(div)

    # save to df 
    data = np.concatenate((np.array(pos_list), np.array(vel_list), np.array([curl_list]).T, np.array([div_list]).T),
                          axis=1)

    columns = ['x', 'y', 'vx', 'vy', 'vz', 'curl', 'div'] if len(vel_field) == 3 else ['x', 'y', 'vx', 'vy', 'curl',
                                                                                       'div']
    df_out = pd.DataFrame(data, columns=columns)
    df_out.to_csv(osp.join(data_dir, 'v_fields.csv'))


def export_diff_field(diff_field, pos_list, data_dir, space=space, timescale=timescale):
    """
    Export diffusion coefficient field evaluated at an list of positions.
    diff_field (generated by polynom_mapping) is the noise_amplitude field. diffusion = noise_amp ** 2 / (2. * dim * dt)
    """

    # get velocity list
    D_list = []
    for pos in pos_list:
        diff_ampl = polynom_mapping(pos, diff_field, space)
        D_list.append(diff_ampl ** 2 / (2. * space['dim'] * timescale))

    # save to df 
    data = np.concatenate((np.array(pos_list), np.array([D_list]).T), axis=1)

    df_out = pd.DataFrame(data, columns=['x', 'y', 'D'])
    df_out.to_csv(osp.join(data_dir, 'D_fields.csv'))


def dx_polynom(polynom, pos):
    """
    x-component of spatial derivative of polynom_mapping at position pos
    """
    if polynom['p'] > 0:
        value = polynom['a'] * polynom['p'] * (pos[0] - polynom['b']) ** (polynom['p'] - 1)
    else:
        value = 0

    return value


def dy_polynom(polynom, pos):
    """
    y-component of spatial derivative of polynom_mapping at position pos
    """
    if polynom['q'] > 0:
        value = polynom['c'] * polynom['q'] * (pos[1] - polynom['d']) ** (polynom['q'] - 1)
    else:
        value = 0

    return value


def make_traj(pos0, frame_num=frame_num, diff_field=diff_field, vel_field=vel_field, space=space, track_id=0,
              timescale=timescale):
    """
    Generate a trajectory of frame_num positions from a an initial position pos0. 
    At each frame, the displacement is given by the superposition of a diffusive component and ballistic component.
    Each component are computed at the position using the diffusion filed diff_field and the velocity field vel_field.
    """

    traj = pd.DataFrame(columns=['track', 'frame'] + list('xyz')[:space['dim']])

    pos = pos0  # intialize position
    traj.loc[0, :] = [track_id, 0] + list(pos0)

    for frame in range(1, frame_num):  # first position already given

        # diffusive component
        diff = np.random.randn(space['dim'])
        diff_2 = diff ** 2
        diff_normalized = diff / np.sqrt(diff_2.sum())
        diff_ampl = polynom_mapping(pos, diff_field, space)  # diffusion amplitude at pos

        # ballistic component
        vel = n_polynom_mapping(pos, params_list=vel_field, space=space)  # velocity at pos

        # new position
        pos += vel * timescale + diff_ampl * diff_normalized  # pos += v*dt + noise_amplitude

amichaut's avatar
amichaut committed
284
        if periodic:
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
            for i in range(len(pos)):
                pos[i] = np.remainder(pos[i], space['lims'][i])

        traj.loc[frame, :] = [track_id, frame] + list(pos)

    return traj


def make_dataset(pos0_list, frame_num=frame_num, diff_field=diff_field, vel_field=vel_field, space=space,
                 timescale=timescale, lengthscale=lengthscale):
    """
    Make set of trajectories using make_traj
    """

    dimensions = list('xyz')[:space['dim']]
    df = pd.DataFrame(columns=['track', 'frame'] + dimensions)

    for i, pos0 in enumerate(pos0_list):
        traj = make_traj(pos0, frame_num=frame_num, diff_field=diff_field, vel_field=vel_field, space=space, track_id=i,
                         timescale=timescale)

        df = pd.concat([df, traj], ignore_index=True)

    for d in dimensions:
        df[d] *= lengthscale

    return df


def plot_synthetic_stack(df, outdir, dpi=300, image_size=[500, 500], frame_num=10):
amichaut's avatar
amichaut committed
315
316
    """Plot synthetic data and save it as a grayscaled tiff stack"""

317
    stack = []  # stack to store images
318
    groups = df.groupby('frame')
319

320
    # plot frames
321
    for i in range(frame_num):
322
        group = groups.get_group(i).reset_index(drop=True)
323

324
        figsize = (image_size[0] / dpi, image_size[1] / dpi)
325
        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
amichaut's avatar
amichaut committed
326
327
        ax = fig.add_axes([0, 0, 1, 1])
        for k in range(group.shape[0]):
328
329
330
            ax.scatter(group.loc[k, 'x'], group.loc[k, 'y'], s=3)
        ax.set_xlim(0, image_size[0])
        ax.set_ylim(0, image_size[1])
amichaut's avatar
amichaut committed
331
332
        ax.invert_yaxis()
        ax.axis('off')
333

334
335
336
337
338
339
340
341
342
343
        fig.canvas.draw()
        fig_image = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
        fig_image = fig_image.reshape(fig.canvas.get_width_height()[::-1] + (3,))
        fig_image = rgb2gray(fig_image)  # convert to grayscale
        fig_image = img_as_ubyte(fig_image)  # convert to 8 bit
        stack.append(fig_image)
        plt.close(fig)

    # save
    stack = np.array(stack)
344
    tifff.imwrite(osp.join(outdir, 'stack.tiff'), stack)