diff --git a/RNA_Seq_Cecere/RNA-seq.snakefile b/RNA_Seq_Cecere/RNA-seq.snakefile index fd6421f59e7cd3fd2c727cbd3cd8da3876da0d7a..0ee820e0265319ce50b2971ead25243e39b7149b 100644 --- a/RNA_Seq_Cecere/RNA-seq.snakefile +++ b/RNA_Seq_Cecere/RNA-seq.snakefile @@ -1396,7 +1396,9 @@ rule plot_spikein_responses: output: response_plots = expand(OPJ(aligner, f"mapped_{genome}", "{{counter}}", f"all_on_{genome}", "{lib}_{rep}_spike_ins_{{orientation}}_TPM_response.pdf"), lib=LIBS, rep=REPS), - # spikein_slope_files = OPJ(aligner, f"mapped_{genome}", "{counter}", f"all_on_{genome}", "spike_ins_{orientation}_TPM_response_slope.txt"), + spikein_slope_files = OPJ( + aligner, f"mapped_{genome}", "{counter}", f"all_on_{genome}", + "spike_ins_{orientation}_TPM_response_slope_and_intercept.txt"), run: tpm_table = pd.read_table(input.tpm_file, index_col="gene") common = tpm_table.index.intersection(spikein_expected_counts.index) @@ -1410,6 +1412,8 @@ rule plot_spikein_responses: # plot_spikein_response, data, lib, # title="spike-ins TPM response") # pp.close() + slopes = {} + intercepts = {} rel_sq_diffs_list = [] for libname in data.columns[:-1]: filename = OPJ( @@ -1433,6 +1437,14 @@ rule plot_spikein_responses: # intercept supposed to be 0 for spikein response data, "expected_counts", libname, y_min=1, fit_intercept=False, transform="log10") rel_sq_diffs_list.append(rel_squared_diffs) + slopes[libname] = regline_slope + intercepts[libname] = regline_intercept + reg_params = pd.concat( + [pd.Series(slopes), pd.Series(intercepts)], + axis=1) + reg_params.columns = ["slope", "intercept"] + reg_params.to_csv(output.spikein_slope_files, sep="\t") + # TODO (28/10/2022): The following looks like old work in progress: all_rel_sq_diffs = pd.concat(rel_sq_diffs_list, axis=1).dropna() # print(all_rel_sq_diffs) #all_rel_sq_diffs.assign(p_normal=normaltest(all_rel_sq_diffs, axis=1, nan_policy="omit").pvalue)