Skip to content
Snippets Groups Projects
Commit 793760c9 authored by Andrey Aristov's avatar Andrey Aristov
Browse files

merge tables

parent 76498128
Branches
Tags
No related merge requests found
rule combine_tables:
input:
table1="{folder}/day1/table.csv",
table2="{folder}/day2/table.csv"
output:
"{folder}/final_table.csv"
shell:
"python merge_tables.py {input.table1} {input.table2} {output}"
rule align_and_count: rule align_and_count:
input: input:
data="{folder}/BF_TRITC_maxZ.zarr", data="{folder}/day{day}/BF_TRITC_maxZ.zarr",
concentrations="{folder}/../concentrations.yaml", concentrations="{folder}/concentrations.yaml",
template="template16_pad100-adj.tif", template="template16_pad100-adj.tif",
labels="labels_bin2+100.tif" labels="labels_bin2+100.tif"
output: output:
directory("{folder}/BF_TRITC_aligned.zarr") zarr = directory("{folder}/day{day}/BF_TRITC_aligned.zarr"),
log: table = "{folder}/day{day}/table.csv"
"{folder}/snakemake_align_n_count.log"
shell: shell:
"python align.py {input.data} {output} {input.concentrations} {input.template} {input.labels} 1" "python align.py {input.data} {output.zarr} {input.concentrations} {input.template} {input.labels} {output.table} 1"
rule align_and_count_2D: rule align_and_count_2D:
input: input:
data="{folder}/BF-TRITC-2D.zarr", data="{folder}/day{day}/BF-TRITC-2D.zarr",
concentrations="{folder}/../concentrations.yaml", concentrations="{folder}/concentrations.yaml",
template="template16_pad100-adj.tif", template="template16_pad100-adj.tif",
labels="labels_bin2+100.tif" labels="labels_bin2+100.tif"
output: output:
directory("{folder}/BF_TRITC_2D_aligned.zarr") directory("{folder}/day{day}/BF_TRITC_aligned.zarr"),
log: table = "{folder}/day{day}/table.csv"
"{folder}/snakemake_align_n_count.log"
shell: shell:
"python align.py {input.data} {output} {input.concentrations} {input.template} {input.labels} 1" "python align.py {input.data} {output} {input.concentrations} {input.template} {input.labels} {output.table} 0"
rule get_sizes_nd2: rule get_sizes_nd2:
...@@ -36,10 +43,10 @@ rule get_sizes_nd2: ...@@ -36,10 +43,10 @@ rule get_sizes_nd2:
rule combine_BF_TRITC_3D_maxZ: rule combine_BF_TRITC_3D_maxZ:
input: input:
bf="{folder}/BF-2D.zarr", bf="{folder}/day{day}/BF-2D.zarr",
fluo="{folder}/TRITC-3D.zarr" fluo="{folder}/day{day}/TRITC-3D.zarr"
output: output:
directory("{folder}/BF_TRITC_maxZ.zarr") directory("{folder}/day{day}/BF_TRITC_maxZ.zarr")
shell: shell:
"python combine.py {input.bf} {input.fluo} {output}" "python combine.py {input.bf} {input.fluo} {output}"
......
...@@ -11,7 +11,7 @@ from multiprocessing import Pool ...@@ -11,7 +11,7 @@ from multiprocessing import Pool
import nd2 import nd2
import pandas as pd import pandas as pd
def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_path, labels_path, fit_poisson, nmax=10): def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_path, labels_path, table_path, fit_poisson, nmax=10):
with open(concentrations_path, 'r') as f: with open(concentrations_path, 'r') as f:
concentrations_dct = (yaml.safe_load(f)) concentrations_dct = (yaml.safe_load(f))
concentrations = concentrations_dct['concentrations'] concentrations = concentrations_dct['concentrations']
...@@ -19,13 +19,8 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa ...@@ -19,13 +19,8 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa
unit = concentrations_dct['units'] unit = concentrations_dct['units']
tif_paths = [f'{os.path.dirname(BF_TRITC_2D_path)}/{c:02d}{unit}_aligned.tif' for c in concentrations] tif_paths = [f'{os.path.dirname(BF_TRITC_2D_path)}/{c:02d}{unit}_aligned.tif' for c in concentrations]
print('tif_paths: ', tif_paths) print('tif_paths: ', tif_paths)
if BF_TRITC_2D_path.endswith('.zarr'):
data = da.from_zarr(BF_TRITC_2D_path+'/0/')
elif BF_TRITC_2D_path.endswith('.nd2'):
data = nd2.ND2File(BF_TRITC_2D_path).to_dask()
print('data:', data)
data = read_dask(BF_TRITC_2D_path)
template16 = tf.imread(template_path) template16 = tf.imread(template_path)
big_labels = tf.imread(labels_path) big_labels = tf.imread(labels_path)
...@@ -39,7 +34,7 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa ...@@ -39,7 +34,7 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa
try: try:
p=Pool(data.shape[0]) p=Pool(data.shape[0])
aligned = p.map(fun, zip(data, tif_paths, concentrations)) out = p.map(fun, zip(data, tif_paths, concentrations))
except TypeError as e: except TypeError as e:
print(f'Pool failed due to {e.args}') print(f'Pool failed due to {e.args}')
...@@ -49,7 +44,7 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa ...@@ -49,7 +44,7 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa
aligned = [o['stack'] for o in out] aligned = [o['stack'] for o in out]
counts = [o['counts'] for o in out] counts = [o['counts'] for o in out]
df = pd.concat(counts, ignore_index=True).sort_values(['[AB]','label']) df = pd.concat(counts, ignore_index=True).sort_values(['[AB]','label'])
df.to_csv(os.path.join(os.path.dirname(out_path), 'table.csv')) df.to_csv(table_path)
daligned = da.from_array(np.array(aligned)) daligned = da.from_array(np.array(aligned))
convert.to_zarr( convert.to_zarr(
...@@ -62,6 +57,15 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa ...@@ -62,6 +57,15 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa
) )
return out_path return out_path
def read_dask(path:str):
if path.endswith('.zarr'):
data = da.from_zarr(path+'/0/')
elif path.endswith('.nd2'):
data = nd2.ND2File(path).to_dask()
else:
raise ValueError(f'Unexpected file format, expected zarr or nd2')
print('data:', data)
return data
def align_parallel(args, **kwargs): def align_parallel(args, **kwargs):
return align2D(*args, **kwargs) return align2D(*args, **kwargs)
......
import pandas as pd
import fire
def combine(table_day1_path, table_day2_path, output_path):
day1, day2 = [pd.read_csv(t) for t in [table_day1_path, table_day2_path]]
day1.loc[:,'n_cells_final'] = day2.n_cells
day1.to_csv(output_path, index=None)
if __name__ == "__main__":
fire.Fire(combine)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment