diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile index 1be3459772c0f7185ddce39605a35140f4878f1d..aad860eab8a004abaa912f397065f10932c23707 100644 --- a/small_RNA-seq/small_RNA-seq.snakefile +++ b/small_RNA-seq/small_RNA-seq.snakefile @@ -2564,6 +2564,35 @@ if False: usecols=["gene", "cosmid", "name", "small_type", "mean_log2_RPM_fold"])["mean_log2_RPM_fold"] for contrast in IP_CONTRASTS}).to_csv( output.all_folds, sep="\t") +#rule gather_RPM_folds: +# """Gathers RPM folds across contrasts.""" +# input: +# fold_results = expand(OPJ( +# mapping_dir, "RPM_by_{{norm}}_folds_%s" % size_selected, +# "{contrast}", "{contrast}_{{small_type}}_RPM_by_{{norm}}_folds.txt"), contrast=IP_CONTRASTS), +# output: +# all_folds = OPJ( +# mapping_dir, "RPM_by_{norm}_folds_%s" % size_selected, "all", "{small_type}_mean_log2_RPM_by_{norm}_fold.txt"), +# # wildcard_constraints: +# # small_type="si|siu|sisiu|all_si|all_siu|all_sisiu|%s"% "|".join(SMALL_TYPES + JOINED_SMALL_TYPES), +# log: +# log = OPJ(log_dir, "gather_RPM_by_{norm}_folds", "{small_type}.log"), +# benchmark: +# OPJ(log_dir, "gather_RPM_by_{norm}_folds", "{small_type}_benchmark.txt"), +# run: +# with open(log.log, "w") as logfile: +# logfile.write(f"Debug: input\n{input}\n") +# actual_input = [ +# OPJ(mapping_dir, f"RPM_by_{wildcards.norm}_folds_{size_selected}", +# f"{contrast}", f"{contrast}_{wildcards.small_type}_RPM_by_{wildcards.norm}_folds.txt") for contrast in IP_CONTRASTS] +# logfile.write(f"Gathering RPM folds from:\n{actual_input}\nShould be from:\n{input.fold_results}\n") +# pd.DataFrame({contrast : pd.read_table( +# OPJ(mapping_dir, f"RPM_by_{wildcards.norm}_folds_{size_selected}", +# f"{contrast}", f"{contrast}_{wildcards.small_type}_RPM_by_{wildcards.norm}_folds.txt"), +# index_col=["gene", "cosmid", "name", "small_type"], +# usecols=["gene", "cosmid", "name", "small_type", "mean_log2_RPM_fold"])["mean_log2_RPM_fold"] for contrast in IP_CONTRASTS}).to_csv( +# output.all_folds, sep="\t") + rule gather_RPM_folds: """Gathers RPM folds across contrasts.""" input: @@ -2774,8 +2803,11 @@ rule gather_remapped_RPM_folds: def source_gathered_folds(wildcards): if hasattr(wildcards, "counted_type"): return rules.gather_remapped_RPM_folds.output.all_folds - elif wildcards.fold_type in RPM_FOLD_TYPES: #elif wildcards.fold_type == "mean_log2_RPM_fold": + elif wildcards.fold_type in RPM_FOLD_TYPES: + assert wildcards.fold_type.startswith("mean_log2_RPM_by_") + assert wildcards.fold_type.endswith("_fold") + wildcards.norm = wildcards.fold_type[len("mean_log2_RPM_by_"):-len("_fold")] return rules.gather_RPM_folds.output.all_folds else: return rules.gather_DE_folds.output.all_folds