diff --git a/Snakefile b/Snakefile index 2194ab94c8287b6a5dd0e97cc3b80c8c2edd815e..e910ee81368b4fb4ef6d9f9fbfe18a618ae8764b 100644 --- a/Snakefile +++ b/Snakefile @@ -1,29 +1,36 @@ +rule combine_tables: + input: + table1="{folder}/day1/table.csv", + table2="{folder}/day2/table.csv" + output: + "{folder}/final_table.csv" + shell: + "python merge_tables.py {input.table1} {input.table2} {output}" + rule align_and_count: input: - data="{folder}/BF_TRITC_maxZ.zarr", - concentrations="{folder}/../concentrations.yaml", + data="{folder}/day{day}/BF_TRITC_maxZ.zarr", + concentrations="{folder}/concentrations.yaml", template="template16_pad100-adj.tif", labels="labels_bin2+100.tif" output: - directory("{folder}/BF_TRITC_aligned.zarr") - log: - "{folder}/snakemake_align_n_count.log" + zarr = directory("{folder}/day{day}/BF_TRITC_aligned.zarr"), + table = "{folder}/day{day}/table.csv" shell: - "python align.py {input.data} {output} {input.concentrations} {input.template} {input.labels} 1" + "python align.py {input.data} {output.zarr} {input.concentrations} {input.template} {input.labels} {output.table} 1" rule align_and_count_2D: input: - data="{folder}/BF-TRITC-2D.zarr", - concentrations="{folder}/../concentrations.yaml", + data="{folder}/day{day}/BF-TRITC-2D.zarr", + concentrations="{folder}/concentrations.yaml", template="template16_pad100-adj.tif", labels="labels_bin2+100.tif" output: - directory("{folder}/BF_TRITC_2D_aligned.zarr") - log: - "{folder}/snakemake_align_n_count.log" + directory("{folder}/day{day}/BF_TRITC_aligned.zarr"), + table = "{folder}/day{day}/table.csv" shell: - "python align.py {input.data} {output} {input.concentrations} {input.template} {input.labels} 1" + "python align.py {input.data} {output} {input.concentrations} {input.template} {input.labels} {output.table} 0" rule get_sizes_nd2: @@ -36,10 +43,10 @@ rule get_sizes_nd2: rule combine_BF_TRITC_3D_maxZ: input: - bf="{folder}/BF-2D.zarr", - fluo="{folder}/TRITC-3D.zarr" + bf="{folder}/day{day}/BF-2D.zarr", + fluo="{folder}/day{day}/TRITC-3D.zarr" output: - directory("{folder}/BF_TRITC_maxZ.zarr") + directory("{folder}/day{day}/BF_TRITC_maxZ.zarr") shell: "python combine.py {input.bf} {input.fluo} {output}" diff --git a/align.py b/align.py index 0bbdd825d96dc1403096cc01169c6fd6b7187039..834899ce0f6ed344defb12b2c4e38ab4f3546b29 100644 --- a/align.py +++ b/align.py @@ -11,7 +11,7 @@ from multiprocessing import Pool import nd2 import pandas as pd -def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_path, labels_path, fit_poisson, nmax=10): +def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_path, labels_path, table_path, fit_poisson, nmax=10): with open(concentrations_path, 'r') as f: concentrations_dct = (yaml.safe_load(f)) concentrations = concentrations_dct['concentrations'] @@ -19,13 +19,8 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa unit = concentrations_dct['units'] tif_paths = [f'{os.path.dirname(BF_TRITC_2D_path)}/{c:02d}{unit}_aligned.tif' for c in concentrations] print('tif_paths: ', tif_paths) - - if BF_TRITC_2D_path.endswith('.zarr'): - data = da.from_zarr(BF_TRITC_2D_path+'/0/') - elif BF_TRITC_2D_path.endswith('.nd2'): - data = nd2.ND2File(BF_TRITC_2D_path).to_dask() - print('data:', data) + data = read_dask(BF_TRITC_2D_path) template16 = tf.imread(template_path) big_labels = tf.imread(labels_path) @@ -39,7 +34,7 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa try: p=Pool(data.shape[0]) - aligned = p.map(fun, zip(data, tif_paths, concentrations)) + out = p.map(fun, zip(data, tif_paths, concentrations)) except TypeError as e: print(f'Pool failed due to {e.args}') @@ -49,7 +44,7 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa aligned = [o['stack'] for o in out] counts = [o['counts'] for o in out] df = pd.concat(counts, ignore_index=True).sort_values(['[AB]','label']) - df.to_csv(os.path.join(os.path.dirname(out_path), 'table.csv')) + df.to_csv(table_path) daligned = da.from_array(np.array(aligned)) convert.to_zarr( @@ -62,6 +57,15 @@ def align_multichip(BF_TRITC_2D_path, out_path, concentrations_path, template_pa ) return out_path +def read_dask(path:str): + if path.endswith('.zarr'): + data = da.from_zarr(path+'/0/') + elif path.endswith('.nd2'): + data = nd2.ND2File(path).to_dask() + else: + raise ValueError(f'Unexpected file format, expected zarr or nd2') + print('data:', data) + return data def align_parallel(args, **kwargs): return align2D(*args, **kwargs) diff --git a/merge_tables.py b/merge_tables.py new file mode 100644 index 0000000000000000000000000000000000000000..aab0fe3267125da5a9357615e6a373dee9c7604b --- /dev/null +++ b/merge_tables.py @@ -0,0 +1,10 @@ +import pandas as pd +import fire + +def combine(table_day1_path, table_day2_path, output_path): + day1, day2 = [pd.read_csv(t) for t in [table_day1_path, table_day2_path]] + day1.loc[:,'n_cells_final'] = day2.n_cells + day1.to_csv(output_path, index=None) + +if __name__ == "__main__": + fire.Fire(combine) \ No newline at end of file