Commit 0478e14a authored by Dmitry  ERSHOV's avatar Dmitry ERSHOV
Browse files

Use a new (icy generated) training set.

parent 982a9a5b
This diff is collapsed.
This diff is collapsed.
{"prob": 0.46988391876220703, "nms": 0.4}
\ No newline at end of file
{"prob": 0.47588062286376953, "nms": 0.4}
\ No newline at end of file
%% Cell type:markdown id: tags:
### This notebook is to generate training data from the original tifs and xmls
%% Cell type:code id: tags:
``` python
import os, sys
import xml.etree.ElementTree as ET
import pandas as pd
import tifffile
import matplotlib.pyplot as plt
import numpy as np
from skimage.morphology import dilation, selem
import glob
ROOT_DIR = os.path.abspath( '..' )
DATA_DIR = os.path.join( ROOT_DIR, 'data' )
DATA_RAW_DIR = os.path.join( DATA_DIR, '1_raw_movies_and_xmls' )
DATA_GEN_DIR = os.path.join(DATA_DIR, '2_generated' )
# DATA_RAW_DIR = os.path.join( DATA_DIR, '1_official_test_set' )
# DATA_GEN_DIR = os.path.join(DATA_DIR, '2_generated_from_official_set' )
DATA_RAW_DIR = os.path.join( DATA_DIR, '1_icy-benchmark-training_set' )
DATA_GEN_DIR = os.path.join(DATA_DIR, '2_generated_from_icy_benchmark_set' )
IMAGES_DIR = os.path.join(DATA_GEN_DIR, 'images' )
MASKS_DIR = os.path.join(DATA_GEN_DIR, 'masks' )
for f in [IMAGES_DIR, MASKS_DIR]:
if not os.path.exists(f):
os.makedirs(f)
print('Made dir: %s'%f)
print('Created dir: %s'%f)
```
%% Cell type:code id: tags:
``` python
# lll = os.path.join(DATA_DIR, '2_generated_from_icy_benchmark_set/images' )
# lll = os.listdir( lll )
# lll = set( [l.split(' ')[0] for l in lll] )
# lll
# lll2 = os.path.join(DATA_DIR, '2_generated_from_official_set/images' )
# lll2 = os.listdir( lll2 )
# lll2 = set( [l.split(' ')[0] for l in lll2] )
# lll, lll2
```
%% Output
({'MICROTUBULE', 'RECEPTOR', 'VESICLE', 'VIRUS'},
{'MICROTUBULE', 'RECEPTOR', 'VESICLE'})
%% Cell type:code id: tags:
``` python
def read_xml_as_pands( xml_abs_path ):
"""
Reads xml file and converts it to pandas DataFrame.
Input
-----
xml_abs_path : str
absolute path to xml file
Return
------
df : pandas.DataFrame
Contains all required information: particle id, coordinates in space and time.
"""
tree = ET.parse( xml_abs_path )
root = tree.getroot()
particles = root[0]
# the fastes way is to first collect all the data and then feed it to pandas.
# if you create pandas in each loop step, it will be way longer
ii, tt, xx, yy, zz = [], [],[],[],[]
for pi, detections in enumerate(particles): #over particles
for d in detections: # over each p's detectoins
t,x,y,z = d.attrib['t'], d.attrib['x'], d.attrib['y'], d.attrib['z']
ii.append(pi)
tt.append(t)
xx.append(x)
yy.append(y)
zz.append(z)
# feed the collected data to pandas
df = pd.DataFrame(data={'pid':ii, 'fr':tt, 'x':xx, 'y':yy, 'z':zz} )
for n in ['x','y','z']:
df[n] = pd.to_numeric(df[n], downcast='float')
# it is all strings now; convert to numeric
df['fr'] = pd.to_numeric(df['fr'], downcast='integer')
df['pid'] = pd.to_numeric(df['pid'], downcast='integer')
return df
def df_to_lbl( img, df, radius ):
"""
Converts pandas DataFrame with particle coordinates in time and space to image stack.
Input
-----
img : numpy.ndarray
Array representing time-lapse; img.shape =(frame, y, x)
df : pandas.DataFrame
df = ['pid', 'fr', 'x', 'y', 'z']
radius : float
radius of the particle label. Each particle label will be represented by ciircle of this radius.
Return
------
img_lbl : numpy.ndarray
Array representing time-lapse; img_lbl.shape = img.shape =(frame, y, x)
Contains all labels extracted from df. For each particle its label value is the same as its ID.
"""
id_max = df['pid'].max()
assert id_max < 2**16-1, "Label image is coded to be unsigned 16bit; you have %d particles."%id_max
# dilate with:
se = selem.disk(radius)
frames_n, rows, cols = img.shape
img_lbl = np.zeros_like(img).astype(np.uint16)
for _fr in range(frames_n):
_df = df[df['fr']==_fr] # df for cur frame
for i,_d in _df.iterrows(): # df for cur particle in this frame
pid, c, r = int(_d['pid']), int(np.round(_d['x'])), int(np.round(_d['y']))
img_lbl[_fr, r, c] = pid # mark a pixel with particle id
img_lbl[_fr] = dilation( img_lbl[_fr], selem=se)
return img_lbl
```
%% Cell type:markdown id: tags:
# Find raw data
- tif, xml
- Name of xml and tif are exactly the same; just the extension is different.
%% Cell type:code id: tags:
``` python
files = os.listdir(DATA_RAW_DIR)
tifs = [f for f in files if f.endswith('.tif')]
xmls = [f for f in files if f.endswith('.xml')]
# files = os.listdir(DATA_RAW_DIR)
# tifs = [f for f in files if f.endswith('.tif')]
# xmls = [f for f in files if f.endswith('.xml')]
tifs = glob.glob( os.path.join(DATA_RAW_DIR,'*.tif') )
xmls = glob.glob( os.path.join(DATA_RAW_DIR,'*.xml') )
# sort the tifs according to the xmls file order:
xmls.sort()
tifs_sorted = []
# drop the path, keep the name
tifs = list( map(os.path.basename, tifs ) )
xmls = list( map(os.path.basename, xmls ) )
for xmlname in xmls:
tifname = xmlname.replace('xml','tif')
if tifname in tifs:
tifs_sorted.append( tifname )
tifs = tifs_sorted
for x,t in zip( xmls, tifs):
print('%s \t %s'%(x,t))
```
%% Output
MICROTUBULE snr 1 density high.xml MICROTUBULE snr 1 density high.tif
MICROTUBULE snr 1 density low.xml MICROTUBULE snr 1 density low.tif
MICROTUBULE snr 1 density mid.xml MICROTUBULE snr 1 density mid.tif
MICROTUBULE snr 2 density high.xml MICROTUBULE snr 2 density high.tif
MICROTUBULE snr 2 density low.xml MICROTUBULE snr 2 density low.tif
MICROTUBULE snr 2 density mid.xml MICROTUBULE snr 2 density mid.tif
MICROTUBULE snr 4 density high.xml MICROTUBULE snr 4 density high.tif
MICROTUBULE snr 4 density low.xml MICROTUBULE snr 4 density low.tif
MICROTUBULE snr 4 density mid.xml MICROTUBULE snr 4 density mid.tif
MICROTUBULE snr 7 density high.xml MICROTUBULE snr 7 density high.tif
MICROTUBULE snr 7 density low.xml MICROTUBULE snr 7 density low.tif
MICROTUBULE snr 7 density mid.xml MICROTUBULE snr 7 density mid.tif
RECEPTOR snr 1 density high.xml RECEPTOR snr 1 density high.tif
RECEPTOR snr 1 density low.xml RECEPTOR snr 1 density low.tif
RECEPTOR snr 1 density mid.xml RECEPTOR snr 1 density mid.tif
RECEPTOR snr 2 density high.xml RECEPTOR snr 2 density high.tif
RECEPTOR snr 2 density low.xml RECEPTOR snr 2 density low.tif
RECEPTOR snr 2 density mid.xml RECEPTOR snr 2 density mid.tif
RECEPTOR snr 3 density high.xml RECEPTOR snr 3 density high.tif
RECEPTOR snr 3 density low.xml RECEPTOR snr 3 density low.tif
RECEPTOR snr 3 density mid.xml RECEPTOR snr 3 density mid.tif
RECEPTOR snr 4 density high.xml RECEPTOR snr 4 density high.tif
RECEPTOR snr 4 density low.xml RECEPTOR snr 4 density low.tif
RECEPTOR snr 4 density mid.xml RECEPTOR snr 4 density mid.tif
RECEPTOR snr 7 density high.xml RECEPTOR snr 7 density high.tif
RECEPTOR snr 7 density low.xml RECEPTOR snr 7 density low.tif
RECEPTOR snr 7 density mid.xml RECEPTOR snr 7 density mid.tif
VESICLE snr 1 density high.xml VESICLE snr 1 density high.tif
VESICLE snr 1 density low.xml VESICLE snr 1 density low.tif
VESICLE snr 1 density mid.xml VESICLE snr 1 density mid.tif
VESICLE snr 2 density high.xml VESICLE snr 2 density high.tif
VESICLE snr 2 density low.xml VESICLE snr 2 density low.tif
VESICLE snr 2 density mid.xml VESICLE snr 2 density mid.tif
VESICLE snr 4 density high.xml VESICLE snr 4 density high.tif
VESICLE snr 4 density low.xml VESICLE snr 4 density low.tif
VESICLE snr 4 density mid.xml VESICLE snr 4 density mid.tif
VESICLE snr 7 density high.xml VESICLE snr 7 density high.tif
VESICLE snr 7 density low.xml VESICLE snr 7 density low.tif
VESICLE snr 7 density mid.xml VESICLE snr 7 density mid.tif
VIRUS snr 1 density high.xml VIRUS snr 1 density high.tif
VIRUS snr 1 density low.xml VIRUS snr 1 density low.tif
VIRUS snr 1 density mid.xml VIRUS snr 1 density mid.tif
VIRUS snr 2 density high.xml VIRUS snr 2 density high.tif
VIRUS snr 2 density low.xml VIRUS snr 2 density low.tif
VIRUS snr 2 density mid.xml VIRUS snr 2 density mid.tif
VIRUS snr 4 density high.xml VIRUS snr 4 density high.tif
VIRUS snr 4 density low.xml VIRUS snr 4 density low.tif
VIRUS snr 4 density mid.xml VIRUS snr 4 density mid.tif
VIRUS snr 7 density high.xml VIRUS snr 7 density high.tif
VIRUS snr 7 density low.xml VIRUS snr 7 density low.tif
VIRUS snr 7 density mid.xml VIRUS snr 7 density mid.tif
%% Cell type:markdown id: tags:
# Generate label images
- read a pair of tif, xml
- convert xml to dataframe, dataframe to img_lbl.
- Store each slice in tif and img_lbl: tif slices go to Image folder and img_lbl slices go to Mask folder
- The data Image/Mask is used for training a stardist model.
%% Cell type:code id: tags:
``` python
# LAbel is a disk of radius:
R = 3
for xml_file, tif_file in zip(xmls, tifs):
base_name = os.path.splitext(xml_file)[0]
print(base_name, end='....')
# read
img = tifffile.imread( os.path.join(DATA_RAW_DIR, tif_file))
df = read_xml_as_pands( os.path.join(DATA_RAW_DIR, xml_file) )
# convert df to img_lbl
img_lbl = df_to_lbl( img, df, radius=R )
# store each slice.
for _fr, (_im, _lbl) in enumerate(zip(img, img_lbl)):
_fname = '%s_%05d.tif'%(base_name, _fr)
img_abs_path = os.path.join( IMAGES_DIR, _fname )
lbl_abs_path = os.path.join( MASKS_DIR, _fname )
# print( ' ', img_abs_path )
# print( ' ', lbl_abs_path )
tifffile.imwrite(img_abs_path, data=_im )
tifffile.imwrite(lbl_abs_path, data=_lbl )
print(' DONE.')
print('Done all.')
```
%% Output
MICROTUBULE snr 1 density high.... DONE.
MICROTUBULE snr 1 density low.... DONE.
MICROTUBULE snr 1 density mid.... DONE.
MICROTUBULE snr 2 density high.... DONE.
MICROTUBULE snr 2 density low.... DONE.
MICROTUBULE snr 2 density mid.... DONE.
MICROTUBULE snr 4 density high.... DONE.
MICROTUBULE snr 4 density low.... DONE.
MICROTUBULE snr 4 density mid.... DONE.
MICROTUBULE snr 7 density high.... DONE.
MICROTUBULE snr 7 density low.... DONE.
MICROTUBULE snr 7 density mid.... DONE.
RECEPTOR snr 1 density high.... DONE.
RECEPTOR snr 1 density low.... DONE.
RECEPTOR snr 1 density mid.... DONE.
RECEPTOR snr 2 density high.... DONE.
RECEPTOR snr 2 density low.... DONE.
RECEPTOR snr 2 density mid.... DONE.
RECEPTOR snr 3 density high.... DONE.
RECEPTOR snr 3 density low.... DONE.
RECEPTOR snr 3 density mid.... DONE.
RECEPTOR snr 4 density high.... DONE.
RECEPTOR snr 4 density low.... DONE.
RECEPTOR snr 4 density mid.... DONE.
RECEPTOR snr 7 density high.... DONE.
RECEPTOR snr 7 density low.... DONE.
RECEPTOR snr 7 density mid.... DONE.
VESICLE snr 1 density high.... DONE.
VESICLE snr 1 density low.... DONE.
VESICLE snr 1 density mid.... DONE.
VESICLE snr 2 density high.... DONE.
VESICLE snr 2 density low.... DONE.
VESICLE snr 2 density mid.... DONE.
VESICLE snr 4 density high.... DONE.
VESICLE snr 4 density low.... DONE.
VESICLE snr 4 density mid.... DONE.
VESICLE snr 7 density high.... DONE.
VESICLE snr 7 density low.... DONE.
VESICLE snr 7 density mid.... DONE.
VIRUS snr 1 density high.... DONE.
VIRUS snr 1 density low.... DONE.
VIRUS snr 1 density mid.... DONE.
VIRUS snr 2 density high.... DONE.
VIRUS snr 2 density low.... DONE.
VIRUS snr 2 density mid.... DONE.
VIRUS snr 4 density high.... DONE.
VIRUS snr 4 density low.... DONE.
VIRUS snr 4 density mid.... DONE.
VIRUS snr 7 density high.... DONE.
VIRUS snr 7 density low.... DONE.
VIRUS snr 7 density mid.... DONE.
Done all.
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:markdown id: tags:
### This notebook is to generate training data from the original tifs and xmls
%% Cell type:code id: tags:
``` python
import os, sys
import xml.etree.ElementTree as ET
import pandas as pd
import tifffile
import matplotlib.pyplot as plt
import numpy as np
from skimage.morphology import dilation, selem
import glob
ROOT_DIR = os.path.abspath( '..' )
DATA_DIR = os.path.join( ROOT_DIR, 'data' )
DATA_RAW_DIR = os.path.join( DATA_DIR, '1_raw_movies_and_xmls' )
DATA_GEN_DIR = os.path.join(DATA_DIR, '2_generated' )
# DATA_RAW_DIR = os.path.join( DATA_DIR, '1_official_test_set' )
# DATA_GEN_DIR = os.path.join(DATA_DIR, '2_generated_from_official_set' )
DATA_RAW_DIR = os.path.join( DATA_DIR, '1_icy-benchmark-training_set' )
DATA_GEN_DIR = os.path.join(DATA_DIR, '2_generated_from_icy_benchmark_set' )
IMAGES_DIR = os.path.join(DATA_GEN_DIR, 'images' )
MASKS_DIR = os.path.join(DATA_GEN_DIR, 'masks' )
for f in [IMAGES_DIR, MASKS_DIR]:
if not os.path.exists(f):
os.makedirs(f)
print('Made dir: %s'%f)
print('Created dir: %s'%f)
```
%% Cell type:code id: tags:
``` python
# lll = os.path.join(DATA_DIR, '2_generated_from_icy_benchmark_set/images' )
# lll = os.listdir( lll )
# lll = set( [l.split(' ')[0] for l in lll] )
# lll
# lll2 = os.path.join(DATA_DIR, '2_generated_from_official_set/images' )
# lll2 = os.listdir( lll2 )
# lll2 = set( [l.split(' ')[0] for l in lll2] )
# lll, lll2
```
%% Output
({'MICROTUBULE', 'RECEPTOR', 'VESICLE', 'VIRUS'},
{'MICROTUBULE', 'RECEPTOR', 'VESICLE'})
%% Cell type:code id: tags:
``` python
def read_xml_as_pands( xml_abs_path ):
"""
Reads xml file and converts it to pandas DataFrame.
Input
-----
xml_abs_path : str
absolute path to xml file
Return
------
df : pandas.DataFrame
Contains all required information: particle id, coordinates in space and time.
"""
tree = ET.parse( xml_abs_path )
root = tree.getroot()
particles = root[0]
# the fastes way is to first collect all the data and then feed it to pandas.
# if you create pandas in each loop step, it will be way longer
ii, tt, xx, yy, zz = [], [],[],[],[]
for pi, detections in enumerate(particles): #over particles
for d in detections: # over each p's detectoins
t,x,y,z = d.attrib['t'], d.attrib['x'], d.attrib['y'], d.attrib['z']
ii.append(pi)
tt.append(t)
xx.append(x)
yy.append(y)
zz.append(z)
# feed the collected data to pandas
df = pd.DataFrame(data={'pid':ii, 'fr':tt, 'x':xx, 'y':yy, 'z':zz} )
for n in ['x','y','z']:
df[n] = pd.to_numeric(df[n], downcast='float')
# it is all strings now; convert to numeric
df['fr'] = pd.to_numeric(df['fr'], downcast='integer')
df['pid'] = pd.to_numeric(df['pid'], downcast='integer')
return df
def df_to_lbl( img, df, radius ):
"""
Converts pandas DataFrame with particle coordinates in time and space to image stack.
Input
-----
img : numpy.ndarray
Array representing time-lapse; img.shape =(frame, y, x)
df : pandas.DataFrame
df = ['pid', 'fr', 'x', 'y', 'z']
radius : float
radius of the particle label. Each particle label will be represented by ciircle of this radius.
Return
------
img_lbl : numpy.ndarray
Array representing time-lapse; img_lbl.shape = img.shape =(frame, y, x)
Contains all labels extracted from df. For each particle its label value is the same as its ID.
"""
id_max = df['pid'].max()
assert id_max < 2**16-1, "Label image is coded to be unsigned 16bit; you have %d particles."%id_max
# dilate with:
se = selem.disk(radius)
frames_n, rows, cols = img.shape
img_lbl = np.zeros_like(img).astype(np.uint16)
for _fr in range(frames_n):
_df = df[df['fr']==_fr] # df for cur frame
for i,_d in _df.iterrows(): # df for cur particle in this frame
pid, c, r = int(_d['pid']), int(np.round(_d['x'])), int(np.round(_d['y']))
img_lbl[_fr, r, c] = pid # mark a pixel with particle id
img_lbl[_fr] = dilation( img_lbl[_fr], selem=se)
return img_lbl
```
%% Cell type:markdown id: tags:
# Find raw data
- tif, xml
- Name of xml and tif are exactly the same; just the extension is different.
%% Cell type:code id: tags:
``` python
files = os.listdir(DATA_RAW_DIR)
tifs = [f for f in files if f.endswith('.tif')]
xmls = [f for f in files if f.endswith('.xml')]
# files = os.listdir(DATA_RAW_DIR)
# tifs = [f for f in files if f.endswith('.tif')]
# xmls = [f for f in files if f.endswith('.xml')]
tifs = glob.glob( os.path.join(DATA_RAW_DIR,'*.tif') )