Skip to content
Snippets Groups Projects
Commit 2acf3cad authored by lswistak's avatar lswistak
Browse files

correct wording

parent aa629ca1
No related branches found
No related tags found
No related merge requests found
Pipeline #136292 passed
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Substack extraction # Substack extraction
This notebook is used to extract substacks from tomogram at the positions of the T3SS. Manually determined T3SS landmark coordinates (see the definition of 0, 1, 2 in the T3SS geometry notebook) are used to This notebook is used to extract substacks from tomogram at the positions of the T3SS. Manually determined T3SS landmark coordinates (see the definition of 0, 1, 2 in the T3SS measurments notebook) are used to
- determine the centers of the substacks and - determine the centers of the substacks and
- and their orientations in 3D. - and their orientations in 3D.
The input to this notebook is an IMOD .mod file and the corresponding tomograms. The input to this notebook is an IMOD .mod file and the corresponding tomograms.
The output is a table containing the processed coordinates and the extracted substacks. The output is a table containing the processed coordinates and the extracted substacks.
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Software environment ## Software environment
Use this notebook with a conda env: Use this notebook with a conda env:
- `conda create -n t3ss_geo python=3.10` - `conda create -n t3ss_geo python=3.10`
- `conda activate t3ss_geo` - `conda activate t3ss_geo`
- `pip install mrcfile pandas imodmodel ipython jupyter matplotlib seaborn ipympl scipy xarray` - `pip install mrcfile pandas imodmodel ipython jupyter matplotlib seaborn ipympl scipy xarray`
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import imodmodel import imodmodel
import mrcfile import mrcfile
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
base_dirs = [ base_dirs = [
'/Volumes/Eirene/Points/Points_corrected', '/Volumes/Eirene/Points/Points_corrected',
'/Volumes/Eirene/Points/20240502_Points', '/Volumes/Eirene/Points/20240502_Points',
] ]
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Extract info from files # Extract info from files
dfs = [] dfs = []
def extract_ts_id_from_fn(fn): def extract_ts_id_from_fn(fn):
return int(fn.split('_')[2]) return int(fn.split('_')[2])
log_msgs = [] log_msgs = []
for base_dir in base_dirs: for base_dir in base_dirs:
for ds_dir in [d for d in os.listdir(base_dir) if d.startswith('0')]: for ds_dir in [d for d in os.listdir(base_dir) if d.startswith('0')]:
ds_path = os.path.join(base_dir, ds_dir) ds_path = os.path.join(base_dir, ds_dir)
print(ds_path) print(ds_path)
fns = [fn for fn in os.listdir(ds_path) if fn.startswith(ds_dir) and fn.endswith('.mrc')] fns = [fn for fn in os.listdir(ds_path) if fn.startswith(ds_dir) and fn.endswith('.mrc')]
for fn in fns: for fn in fns:
root_name = fn.split('rec_corrected.mrc')[0] root_name = fn.split('rec_corrected.mrc')[0]
t3ss_paths = [os.path.join(ds_path, f) for f in os.listdir(ds_path) if f.startswith(root_name) and f.endswith('T3SS.mod')] t3ss_paths = [os.path.join(ds_path, f) for f in os.listdir(ds_path) if f.startswith(root_name) and f.endswith('T3SS.mod')]
if not len(t3ss_paths): if not len(t3ss_paths):
msg = 'No T3SS model found for {}, {}'.format(fn, t3ss_path) msg = 'No T3SS model found for {}, {}'.format(fn, t3ss_path)
log_msgs.append(msg) log_msgs.append(msg)
print(msg) print(msg)
continue continue
t3ss_path = t3ss_paths[0] t3ss_path = t3ss_paths[0]
t3ss_name = os.path.basename(t3ss_path) t3ss_name = os.path.basename(t3ss_path)
tdf = imodmodel.read(t3ss_path) tdf = imodmodel.read(t3ss_path)
tdf['source_fn'] = t3ss_name tdf['source_fn'] = t3ss_name
tdf['type'] = 'T3SS' tdf['type'] = 'T3SS'
cdf = tdf cdf = tdf
cdf['tomo_id'] = extract_ts_id_from_fn(fn) cdf['tomo_id'] = extract_ts_id_from_fn(fn)
cdf['tomo_fn'] = os.path.join(ds_path, fn) cdf['tomo_fn'] = os.path.join(ds_path, fn)
cdf['ds'] = ds_dir cdf['ds'] = ds_dir
cdf['contour_id'] = cdf['contour_id'].astype(int) cdf['contour_id'] = cdf['contour_id'].astype(int)
cdf['object_id'] = cdf['object_id'].astype(int) cdf['object_id'] = cdf['object_id'].astype(int)
# multiply with voxel size and convert to nm # multiply with voxel size and convert to nm
voxel_size = mrcfile.mmap(os.path.join(base_dir, ds_dir, fn), mode='r+').voxel_size.x voxel_size = mrcfile.mmap(os.path.join(base_dir, ds_dir, fn), mode='r+').voxel_size.x
for dim in ['x', 'y', 'z']: for dim in ['x', 'y', 'z']:
cdf[dim+'_nm'] = cdf[dim] * voxel_size / 10 cdf[dim+'_nm'] = cdf[dim] * voxel_size / 10
cdf['voxel_size'] = voxel_size cdf['voxel_size'] = voxel_size
dfs.append(cdf) dfs.append(cdf)
df = pd.concat(dfs) df = pd.concat(dfs)
df df
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# keep only T3SS # keep only T3SS
df = df.set_index(['ds', 'tomo_id', 'object_id', 'contour_id'], inplace=False) df = df.set_index(['ds', 'tomo_id', 'object_id', 'contour_id'], inplace=False)
df = df[df['type'] == 'T3SS'] df = df[df['type'] == 'T3SS']
# keep only needles that have a contour 2 # keep only needles that have a contour 2
df['has_contour_2'] = df.groupby(['ds', 'tomo_id', 'object_id']).apply(lambda x: x.reset_index()['contour_id'].max() > 1) df['has_contour_2'] = df.groupby(['ds', 'tomo_id', 'object_id']).apply(lambda x: x.reset_index()['contour_id'].max() > 1)
df = df[df['has_contour_2'] == True] df = df[df['has_contour_2'] == True]
df = df.reset_index() df = df.reset_index()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
coord_cols = ['x', 'y', 'z'] coord_cols = ['x', 'y', 'z']
df = df.pivot_table(index=['ds', 'tomo_id', 'object_id', 'voxel_size', 'source_fn', 'tomo_fn'], columns='contour_id', values=coord_cols).reset_index() df = df.pivot_table(index=['ds', 'tomo_id', 'object_id', 'voxel_size', 'source_fn', 'tomo_fn'], columns='contour_id', values=coord_cols).reset_index()
for coord_col in coord_cols: for coord_col in coord_cols:
df[coord_col, '12'] = df[coord_col, 2] - df[coord_col, 1] df[coord_col, '12'] = df[coord_col, 2] - df[coord_col, 1]
df[coord_col, '10'] = df[coord_col, 0] - df[coord_col, 1] df[coord_col, '10'] = df[coord_col, 0] - df[coord_col, 1]
# Calculate the projection of the needle onto the transformed substacks # Calculate the projection of the needle onto the transformed substacks
coord_cols = ['x', 'y', 'z'] coord_cols = ['x', 'y', 'z']
v12 = np.array([df[cc, '12'] for cc in coord_cols]).T v12 = np.array([df[cc, '12'] for cc in coord_cols]).T
v10 = np.array([df[cc, '10'] for cc in coord_cols]).T v10 = np.array([df[cc, '10'] for cc in coord_cols]).T
v12_norm = (v12.T / np.linalg.norm(v12, axis=1)).T v12_norm = (v12.T / np.linalg.norm(v12, axis=1)).T
proj = np.array([np.eye(4)] * len(df)) proj = np.array([np.eye(4)] * len(df))
proj[:, :3, 1] = -v12_norm proj[:, :3, 1] = -v12_norm
proj[:, :3, 2] = np.cross(v12_norm, [1, 0, 0], axisa=1) proj[:, :3, 2] = np.cross(v12_norm, [1, 0, 0], axisa=1)
proj[:, :3, 0] = np.cross(proj[:, :3, 1], proj[:, :3, 2], axisa=1, axisb=1) proj[:, :3, 0] = np.cross(proj[:, :3, 1], proj[:, :3, 2], axisa=1, axisb=1)
v12_t = np.array([ v12_t = np.array([
np.dot(np.linalg.inv(proj_el), np.concatenate([v, [0]]))[:3] for proj_el, v in zip(proj, v12) np.dot(np.linalg.inv(proj_el), np.concatenate([v, [0]]))[:3] for proj_el, v in zip(proj, v12)
]) ])
v10_t = np.array([ v10_t = np.array([
np.dot(np.linalg.inv(proj_el), np.concatenate([v, [0]]))[:3] for proj_el, v in zip(proj, v10) np.dot(np.linalg.inv(proj_el), np.concatenate([v, [0]]))[:3] for proj_el, v in zip(proj, v10)
]) ])
for icc, coord_col in enumerate(coord_cols): for icc, coord_col in enumerate(coord_cols):
df[coord_col, '12_t'] = v12_t[:, icc] df[coord_col, '12_t'] = v12_t[:, icc]
df[coord_col, '10_t'] = v10_t[:, icc] df[coord_col, '10_t'] = v10_t[:, icc]
df df
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# save coordinate table # save coordinate table
outdir = "/Volumes/Eirene/Points/extracted_images" outdir = "/Volumes/Eirene/Points/extracted_images"
df.to_csv(os.path.join(outdir, 'T3SS_coordinates.csv'), index=None) df.to_csv(os.path.join(outdir, 'T3SS_coordinates.csv'), index=None)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# produce substacks # produce substacks
from scipy import ndimage from scipy import ndimage
from skimage.transform import EuclideanTransform from skimage.transform import EuclideanTransform
for irow, (index, row) in enumerate(df.iterrows()): for irow, (index, row) in enumerate(df.iterrows()):
if irow != 1: if irow != 1:
continue continue
# im = np.array(mrcfile.read(row['tomo_fn','']).data) # im = np.array(mrcfile.read(row['tomo_fn','']).data)
pos = np.array([row[coord_col, 1] for coord_col in coord_cols[::-1]]) pos = np.array([row[coord_col, 1] for coord_col in coord_cols[::-1]])
n = np.array([row[coord_col, '12'] for coord_col in coord_cols[::-1]]) n = np.array([row[coord_col, '12'] for coord_col in coord_cols[::-1]])
v10 = np.array([row[coord_col, '10'] for coord_col in coord_cols[::-1]]) v10 = np.array([row[coord_col, '10'] for coord_col in coord_cols[::-1]])
voxel_size = row['voxel_size',''] voxel_size = row['voxel_size','']
# phys in nm # phys in nm
c_phys = pos c_phys = pos
R_phys = 150 R_phys = 150
n_phys = n * voxel_size / 10 n_phys = n * voxel_size / 10
# pixel coords # pixel coords
c = c_phys c = c_phys
R = R_phys * 10 / voxel_size R = R_phys * 10 / voxel_size
# normalize n # normalize n
n_norm = n / np.linalg.norm(n) n_norm = n / np.linalg.norm(n)
proj = np.eye(4) proj = np.eye(4)
proj[:3, 1] = -n_norm proj[:3, 1] = -n_norm
proj[:3, 2] = np.cross(n_norm, [1, 0, 0]) proj[:3, 2] = np.cross(n_norm, [1, 0, 0])
proj[:3, 0] = np.cross(proj[:3, 1], proj[:3, 2]) proj[:3, 0] = np.cross(proj[:3, 1], proj[:3, 2])
# output_shape = [int(2*R)] * 3 # output_shape = [int(2*R)] * 3
output_shape = [1] + [int(2*R)] * 2 output_shape = [1] + [int(2*R)] * 2
output_shape_3d = [int(2*R)] * 3 output_shape_3d = [int(2*R)] * 3
output_shape0 = [1] + [int(2*R)] * 2 output_shape0 = [1] + [int(2*R)] * 2
output_shape0_3d = [int(2*R)] * 3 output_shape0_3d = [int(2*R)] * 3
p = EuclideanTransform(translation=[c[0], c[1], c[2]]).params @ proj @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params p = EuclideanTransform(translation=[c[0], c[1], c[2]]).params @ proj @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
p0 = EuclideanTransform(translation=c).params @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params p0 = EuclideanTransform(translation=c).params @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
tims = [] tims = []
for t, d3 in zip( for t, d3 in zip(
[True] * 2 + [False] * 2, [True] * 2 + [False] * 2,
[False, True, False, True], [False, True, False, True],
): ):
if d3: if d3:
output_shape = [int(2*R)] * 3 output_shape = [int(2*R)] * 3
else: else:
output_shape = [1] + [int(2*R)] * 2 output_shape = [1] + [int(2*R)] * 2
if t: if t:
param = EuclideanTransform(translation=[c[0], c[1], c[2]]).params @ proj @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params param = EuclideanTransform(translation=[c[0], c[1], c[2]]).params @ proj @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
else: else:
param = EuclideanTransform(translation=c).params @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params param = EuclideanTransform(translation=c).params @ EuclideanTransform(translation=[-s / 2. for s in output_shape]).params
tmp = ndimage.affine_transform(im, param, output_shape=output_shape, order=1) tmp = ndimage.affine_transform(im, param, output_shape=output_shape, order=1)
# mark center # mark center
tmp[tuple([s//2 for s in output_shape])] = 5 tmp[tuple([s//2 for s in output_shape])] = 5
if not t: if not t:
if d3: if d3:
tmp[tuple([int(s/2. + n[i]) for i, s in enumerate(output_shape)])] = 5 tmp[tuple([int(s/2. + n[i]) for i, s in enumerate(output_shape)])] = 5
print(tmp.shape) print(tmp.shape)
else: else:
if d3: if d3:
n_t = np.dot(np.linalg.inv(proj), np.concatenate([n, [0]]))[:3] n_t = np.dot(np.linalg.inv(proj), np.concatenate([n, [0]]))[:3]
tmp[tuple([int(s/2. + n_t[i]) for i, s in enumerate(output_shape)])] = 5 tmp[tuple([int(s/2. + n_t[i]) for i, s in enumerate(output_shape)])] = 5
v10_t = np.dot(np.linalg.inv(proj), np.concatenate([v10, [0]]))[:3] v10_t = np.dot(np.linalg.inv(proj), np.concatenate([v10, [0]]))[:3]
tmp[tuple([int(s/2. + v10_t[i]) for i, s in enumerate(output_shape)])] = 5 tmp[tuple([int(s/2. + v10_t[i]) for i, s in enumerate(output_shape)])] = 5
out_filename = f"{row.ds}_tomo_{row.tomo_id:02d}_id_{row.object_id:02d}_{['original', 'transformed'][int(t)]}_{['2D', '3D'][int(d3)]}.tif" out_filename = f"{row.ds}_tomo_{row.tomo_id:02d}_id_{row.object_id:02d}_{['original', 'transformed'][int(t)]}_{['2D', '3D'][int(d3)]}.tif"
if not os.path.exists(outdir): if not os.path.exists(outdir):
os.makedirs(outdir) os.makedirs(outdir)
import tifffile import tifffile
tifffile.imwrite(os.path.join(outdir, out_filename), tmp) tifffile.imwrite(os.path.join(outdir, out_filename), tmp)
tims.append(tmp) tims.append(tmp)
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment