Skip to content
Snippets Groups Projects
Commit 5205ca16 authored by Marvin Albert's avatar Marvin Albert
Browse files

Corrected geo notebook

parent dd3b6503
Branches
No related tags found
No related merge requests found
Pipeline #136419 passed
%% Cell type:markdown id: tags:
# T3SS measurments
This notebook is used to measure the measurments of T3SS from three manually determined: 0 (inner membrane/basal body interface ), 1 (basal body/needle junction or basal body/bacteria outer membrane junction for the basal body only) and 2 (needle tip or vacuole membrane/T3SS tip interface, not always present).
This notebook is used to perform measurments of T3SS from three manually determined: 0 (inner membrane/basal body interface ), 1 (basal body/needle junction or basal body/bacteria outer membrane junction for the basal body only) and 2 (needle tip or vacuole membrane/T3SS tip interface, not always present).
The input to this notebook is an IMOD .mod file and the corresponding tomograms.
The output is a table containing the measurements.
## Description
Measurements in nm and degrees
1) distance 1<->2
2) distance 2<->3 (only if 3 exists within the same object)
3) distances between all 2
Perform measurements for
- each dataset
- each T3SS
Output is a table containing
- dataset
- T3SS id
- measurements
%% Cell type:markdown id: tags:
## Software environment
Use this notebook with a conda env:
- `conda create -n t3ss_geo python=3.10`
- `conda activate t3ss_geo`
- `pip install mrcfile pandas imodmodel ipython jupyter matplotlib seaborn ipympl scipy xarray`
%% Cell type:code id: tags:
``` python
import os
import pandas as pd
import numpy as np
import imodmodel
import mrcfile
from scipy import spatial
import xarray as xr
```
%% Cell type:code id: tags:
``` python
base_dir = '/Volumes/Eirene/Points/Points_corrected'
output_dir = os.path.join(base_dir, 'Marvin_test', 'points_measurements')
ds_dirs = [d for d in os.listdir(base_dir) if d.startswith('0')]
if not os.path.exists(output_dir):
os.makedirs(output_dir)
```
%% Cell type:code id: tags:
``` python
# Extract info from files
dfs = []
def extract_ts_id_from_fn(fn):
return int(fn.split('_')[2])
log_msgs = []
for ds_dir in ds_dirs[:]:
ds_path = os.path.join(base_dir, ds_dir)
fns = [fn for fn in os.listdir(ds_path) if fn.startswith(ds_dir) and fn.endswith('.mrc')]
for fn in fns:
root_name = fn.split('rec_corrected.mrc')[0]
t3ss_name = root_name + 'T3SS.mod'
t3ss_path = os.path.join(base_dir, ds_dir, t3ss_name)
breaks_name = root_name + 'break.mod'
breaks_path = os.path.join(base_dir, ds_dir, breaks_name)
if not os.path.exists(t3ss_path):
msg = 'No T3SS model found for {}'.format(fn)
log_msgs.append(msg)
print(msg)
continue
tdf = imodmodel.read(t3ss_path)
tdf['source_fn'] = t3ss_name
tdf['type'] = 'T3SS'
cdf = tdf
cdf['tomo_id'] = extract_ts_id_from_fn(fn)
cdf['tomo_fn'] = fn
cdf['ds'] = ds_dir
cdf['contour_id'] = cdf['contour_id'].astype(int)
cdf['object_id'] = cdf['object_id'].astype(int)
# multiply with voxel size and convert to nm
voxel_size = mrcfile.mmap(os.path.join(base_dir, ds_dir, fn), mode='r+').voxel_size.x
for dim in ['x', 'y', 'z']:
cdf[dim] = cdf[dim] * voxel_size / 10
dfs.append(cdf)
df = pd.concat(dfs)
df
```
%% Cell type:code id: tags:
``` python
# Perform measurements
def measure(gdf):
"""
Measure the distance between two contours
"""
# get position of contours
tdf = gdf[gdf.type=='T3SS']
tdf = tdf.sort_values(by=['object_id', 'contour_id'])
positions = []
for contour_id in [0, 1, 2]:
if contour_id not in tdf.contour_id.values:
positions.append(np.array([np.nan, np.nan, np.nan]))
continue
x = tdf[tdf.contour_id==contour_id].x[0]
y = tdf[tdf.contour_id==contour_id].y[0]
z = tdf[tdf.contour_id==contour_id].z[0]
positions.append(np.array([x, y, z]))
# 1) distance 1<->2
d12 = np.linalg.norm(positions[0] - positions[1])
# 2) distance 2<->3 (only if 3 exists within the same object)
d23 = np.linalg.norm(positions[1] - positions[2])
ms = pd.DataFrame({
'distance_1_2': [d12],
'distance_2_3': [d23],
})
return ms
mdf = df.groupby(['ds', 'tomo_id', 'object_id']).apply(measure)
mdf.to_csv(os.path.join(output_dir, 't3ss_geometry.csv'))
open(os.path.join(output_dir, 't3ss_geometry.log'), 'w').write('\n'.join(log_msgs))
mdf
```
%% Cell type:code id: tags:
``` python
# 5) distances between 2
tdf = df[df.type=='T3SS']
for ds in np.unique(tdf.ds):
stdf = tdf[tdf.ds==ds]
for tomo_id in np.unique(stdf.tomo_id):
tstdf = stdf[stdf.tomo_id==tomo_id]
tstdf2 = tstdf[tstdf.contour_id==1]
tstdf2 = tstdf2.sort_values(by=['object_id'])
poss = np.array([tstdf2.x, tstdf2.y, tstdf2.z]).T
d = spatial.distance_matrix(poss, poss)
xd = xr.DataArray(d, dims=['object_id', 'object_id'], coords={'object_id': tstdf2.object_id})
xd.to_pandas().to_csv(os.path.join(output_dir, f"t3ss_distances_ds_{tstdf2['ds'].values[0]}_tomo-id_{tstdf2['tomo_id'].values[0]}.csv"))
```
%% Cell type:code id: tags:
``` python
import seaborn as sns
import matplotlib.pyplot as plt
pmdf = mdf.reset_index()
xcol = 'ds'
for ycol in ['distance_1_2', 'distance_2_3']:
plt.figure()
ax = sns.boxplot(data=pmdf, x='ds', y=ycol, fliersize=0)
ax2 = sns.stripplot(data=pmdf, x='ds', y=ycol, size=5)
nobs = pmdf[xcol].value_counts().values
pos = range(len(nobs))
labels = [ax.get_xticklabels()[i].get_text() for i in pos]
labels = [l + '\n(N=%s)'%nobs[i] for i, l in enumerate(labels)]
ax.set_xticks(pos)
ax.set_xticklabels(labels)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 't3ss_geometry_{}.pdf'.format(ycol)), dpi=300)
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment