correct wording

2acf3cad · lswistak · aa629ca1 · 2acf3cad
Commit 2acf3cad authored 1 year ago by lswistak
--- a/notebooks/substack_extraction.ipynb
+++ b/notebooks/substack_extraction.ipynb
@@ -6,7 +6,7 @@
   "source": [
    "# Substack extraction\n",
    "\n",
-    "This notebook is used to extract substacks from tomogram at the positions of the T3SS. Manually determined T3SS landmark coordinates (see the definition of 0, 1, 2 in the T3SS geometry notebook) are used to\n",
+    "This notebook is used to extract substacks from tomogram at the positions of the T3SS. Manually determined T3SS landmark coordinates (see the definition of 0, 1, 2 in the T3SS measurments notebook) are used to\n",
    "- determine the centers of the substacks and\n",
    "- and their orientations in 3D.\n",
    "\n",

 %% Cell type:markdown id: tags:
 # Substack extraction
-This notebook is used to extract substacks from tomogram at the positions of the T3SS. Manually determined T3SS landmark coordinates (see the definition of 0, 1, 2 in the T3SS geometry notebook) are used to
+This notebook is used to extract substacks from tomogram at the positions of the T3SS. Manually determined T3SS landmark coordinates (see the definition of 0, 1, 2 in the T3SS measurments notebook) are used to
 - determine the centers of the substacks and
 - and their orientations in 3D.
 The input to this notebook is an IMOD .mod file and the corresponding tomograms.
 The output is a table containing the processed coordinates and the extracted substacks.
 %% Cell type:markdown id: tags:
 ## Software environment
 Use this notebook with a conda env:
 - `conda create -n t3ss_geo python=3.10`
 - `conda activate t3ss_geo`
 - `pip install mrcfile pandas imodmodel ipython jupyter matplotlib seaborn ipympl scipy xarray`
 %% Cell type:code id: tags:
 ``` python
 import os
 import pandas as pd
 import numpy as np
 import imodmodel
 import mrcfile
 ```
 %% Cell type:code id: tags:
 ``` python
 base_dirs = [
    '/Volumes/Eirene/Points/Points_corrected',
    '/Volumes/Eirene/Points/20240502_Points',
    ]
 ```
 %% Cell type:code id: tags:
 ``` python
 # Extract info from files
 dfs = []
 def extract_ts_id_from_fn(fn):
    return int(fn.split('_')[2])
 log_msgs = []
 for base_dir in base_dirs:
    for ds_dir in [d for d in os.listdir(base_dir) if d.startswith('0')]:
        ds_path = os.path.join(base_dir, ds_dir)
        print(ds_path)
        fns = [fn for fn in os.listdir(ds_path) if fn.startswith(ds_dir) and fn.endswith('.mrc')]
        for fn in fns:
            root_name = fn.split('rec_corrected.mrc')[0]
            t3ss_paths = [os.path.join(ds_path, f) for f in os.listdir(ds_path) if f.startswith(root_name) and f.endswith('T3SS.mod')]
            if not len(t3ss_paths):
                msg = 'No T3SS model found for {}, {}'.format(fn, t3ss_path)
                log_msgs.append(msg)
                print(msg)
                continue
            t3ss_path = t3ss_paths[0]
            t3ss_name = os.path.basename(t3ss_path)
            tdf = imodmodel.read(t3ss_path)
            tdf['source_fn'] = t3ss_name
            tdf['type'] = 'T3SS'
            cdf = tdf
            cdf['tomo_id'] = extract_ts_id_from_fn(fn)
            cdf['tomo_fn'] = os.path.join(ds_path, fn)
            cdf['ds'] = ds_dir
            cdf['contour_id'] = cdf['contour_id'].astype(int)
            cdf['object_id'] = cdf['object_id'].astype(int)
            # multiply with voxel size and convert to nm
            voxel_size = mrcfile.mmap(os.path.join(base_dir, ds_dir, fn), mode='r+').voxel_size.x
            for dim in ['x', 'y', 'z']:
                cdf[dim+'_nm'] = cdf[dim] * voxel_size / 10
            cdf['voxel_size'] = voxel_size
            dfs.append(cdf)
 df = pd.concat(dfs)
 df
 ```
 %% Cell type:code id: tags:
 ``` python
 # keep only T3SS
 df = df.set_index(['ds', 'tomo_id', 'object_id', 'contour_id'], inplace=False)
 df = df[df['type'] == 'T3SS']
 # keep only needles that have a contour 2
 df['has_contour_2'] = df.groupby(['ds', 'tomo_id', 'object_id']).apply(lambda x: x.reset_index()['contour_id'].max() > 1)
 df = df[df['has_contour_2'] == True]
 df = df.reset_index()
 ```
 %% Cell type:code id: tags:
 ``` python
 coord_cols = ['x', 'y', 'z']
 df = df.pivot_table(index=['ds', 'tomo_id', 'object_id', 'voxel_size', 'source_fn', 'tomo_fn'], columns='contour_id', values=coord_cols).reset_index()
 for coord_col in coord_cols:
    df[coord_col, '12'] = df[coord_col, 2] - df[coord_col, 1]
    df[coord_col, '10'] = df[coord_col, 0] - df[coord_col, 1]
 # Calculate the projection of the needle onto the transformed substacks
 coord_cols = ['x', 'y', 'z']
 v12 = np.array([df[cc, '12'] for cc in coord_cols]).T
 v10 = np.array([df[cc, '10'] for cc in coord_cols]).T
 v12_norm = (v12.T / np.linalg.norm(v12, axis=1)).T
 proj = np.array([np.eye(4)] * len(df))
 proj[:, :3, 1] = -v12_norm
 proj[:, :3, 2] = np.cross(v12_norm, [1, 0, 0], axisa=1)
 proj[:, :3, 0] = np.cross(proj[:, :3, 1], proj[:, :3, 2], axisa=1, axisb=1)
 v12_t = np.array([
    np.dot(np.linalg.inv(proj_el), np.concatenate([v, [0]]))[:3] for proj_el, v in zip(proj, v12)
 ])
 v10_t = np.array([
    np.dot(np.linalg.inv(proj_el), np.concatenate([v, [0]]))[:3] for proj_el, v in zip(proj, v10)
 ])
 for icc, coord_col in enumerate(coord_cols):
    df[coord_col, '12_t'] = v12_t[:, icc]
    df[coord_col, '10_t'] = v10_t[:, icc]
 df
 ```
 %% Cell type:code id: tags:
 ``` python
 # save coordinate table
 outdir = "/Volumes/Eirene/Points/extracted_images"
 df.to_csv(os.path.join(outdir, 'T3SS_coordinates.csv'), index=None)
 ```
 %% Cell type:code id: tags:
 ``` python
 # produce substacks
 from scipy import ndimage
 from skimage.transform import EuclideanTransform
 for irow, (index, row) in enumerate(df.iterrows()):
    if irow != 1:
        continue
    # im = np.array(mrcfile.read(row['tomo_fn','']).data)
    pos = np.array([row[coord_col, 1] for coord_col in coord_cols[::-1]])
    n = np.array([row[coord_col, '12'] for coord_col in coord_cols[::-1]])
    v10 = np.array([row[coord_col, '10'] for coord_col in coord_cols[::-1]])
    voxel_size = row['voxel_size','']
    # phys in nm
    c_phys = pos
    R_phys = 150
    n_phys = n * voxel_size / 10
    # pixel coords
    c = c_phys
    R = R_phys * 10 / voxel_size
    # normalize n
    n_norm = n / np.linalg.norm(n)
    proj = np.eye(4)
    proj[:3, 1] = -n_norm
    proj[:3, 2] = np.cross(n_norm, [1, 0, 0])
    proj[:3, 0] = np.cross(proj[:3, 1], proj[:3, 2])
    # output_shape = [int(2*R)] * 3
    output_shape = [1] + [int(2*R)] * 2
    output_shape_3d = [int(2*R)] * 3
    output_shape0 = [1] + [int(2*R)] * 2
    output_shape0_3d = [int(2*R)] * 3
    p = EuclideanTransform(translation=[c[0], c[1], c[2]]).params @ proj @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
    p0 = EuclideanTransform(translation=c).params @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
    tims = []
    for t, d3 in zip(
        [True] * 2 + [False] * 2,
        [False, True, False, True],
        ):
        if d3:
            output_shape = [int(2*R)] * 3
        else:
            output_shape = [1] + [int(2*R)] * 2
        if t:
            param = EuclideanTransform(translation=[c[0], c[1], c[2]]).params @ proj @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
        else:
            param = EuclideanTransform(translation=c).params @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
        tmp = ndimage.affine_transform(im, param, output_shape=output_shape, order=1)
        # mark center
        tmp[tuple([s//2 for s in output_shape])] = 5
        if not t:
            if d3:
                tmp[tuple([int(s/2. + n[i]) for i, s in enumerate(output_shape)])] = 5
            print(tmp.shape)
        else:
            if d3:
                n_t = np.dot(np.linalg.inv(proj), np.concatenate([n, [0]]))[:3]
                tmp[tuple([int(s/2. + n_t[i]) for i, s in enumerate(output_shape)])] = 5
                v10_t = np.dot(np.linalg.inv(proj), np.concatenate([v10, [0]]))[:3]
                tmp[tuple([int(s/2. + v10_t[i]) for i, s in enumerate(output_shape)])] = 5
        out_filename = f"{row.ds}_tomo_{row.tomo_id:02d}_id_{row.object_id:02d}_{['original', 'transformed'][int(t)]}_{['2D', '3D'][int(d3)]}.tif"
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        import tifffile
        tifffile.imwrite(os.path.join(outdir, out_filename), tmp)
        tims.append(tmp)
 ```