Skip to content
Snippets Groups Projects
Commit 01da51a2 authored by Blaise Li's avatar Blaise Li
Browse files

Fold boxplots for different biotypes.

Plotting 5UTR, CDS and 3UTR on the same graph can help discover
interesting behaviours.
parent b708b9cd
No related branches found
No related tags found
No related merge requests found
......@@ -610,9 +610,20 @@ de_fold_heatmaps = []
ip_fold_boxplots = []
# Temporary, until used for boxplots:
remapped_folds = []
remapped_fold_boxplots = []
if contrasts_dict["ip"]:
remapped_fold_boxplots = expand(
OPJ(output_dir, "figures", "{contrast}", "by_biotypes",
"{contrast}_{read_type}_on_%s_{biotypes}_{orientation}_transcript_{fold_type}_{gene_list}_boxplots.{fig_format}" % genome),
contrast=IP_CONTRASTS,
read_type=READ_TYPES_FOR_COUNTING,
# TODO: Read this from config file
biotypes=["protein_coding_5UTR_and_protein_coding_CDS_and_protein_coding_3UTR"],
orientation=ORIENTATIONS,
fold_type=["mean_log2_RPM_fold"],
gene_list=BOXPLOT_GENE_LISTS, fig_format=FIG_FORMATS)
remapped_folds = expand(
OPJ(mapping_dir, f"RPM_folds_{size_selected}", "all", "{counted_type}_mean_log2_RPM_fold.txt"),
OPJ(mapping_dir, f"RPM_folds_{size_selected}", "all", "remapped", "{counted_type}_mean_log2_RPM_fold.txt"),
counted_type=REMAPPED_COUNTED),
# snakemake -n OK
# expand(
......@@ -732,6 +743,7 @@ rule all:
exploratory_graphs,
remapped_folds,
fold_boxplots,
remapped_fold_boxplots,
#expand(OPJ(output_dir, "figures", "{small_type}_unit_clustermap.{fig_format}"), small_type=SMALL_TYPES, fig_format=FIG_FORMATS),
#expand(OPJ(
# feature_counts_dir,
......@@ -1834,7 +1846,7 @@ rule compute_remapped_RPM_folds:
output:
fold_results = OPJ(
mapping_dir, f"RPM_folds_{size_selected}",
"{contrast}", "{contrast}_{read_type}_on_%s_{biotype}_{orientation}_{feature_type}_RPM_folds.txt" % genome),
"{contrast}", "remapped", "{contrast}_{read_type}_on_%s_{biotype}_{orientation}_{feature_type}_RPM_folds.txt" % genome),
log:
log = OPJ(log_dir, "compute_RPM_folds", "{contrast}_{read_type}_on_%s_{biotype}_{orientation}_transcript.log" % genome),
run:
......@@ -1886,10 +1898,10 @@ rule gather_remapped_RPM_folds:
input:
fold_results = expand(OPJ(
mapping_dir, f"RPM_folds_{size_selected}",
"{contrast}", "{contrast}_{{read_type}}_on_%s_{{biotype}}_{{orientation}}_transcript_RPM_folds.txt" % genome), contrast=IP_CONTRASTS),
"{contrast}", "remapped", "{contrast}_{{read_type}}_on_%s_{{biotype}}_{{orientation}}_transcript_RPM_folds.txt" % genome), contrast=IP_CONTRASTS),
output:
all_folds = OPJ(
mapping_dir, f"RPM_folds_{size_selected}", "all", "{read_type}_on_%s_{biotype}_{orientation}_transcript_mean_log2_RPM_fold.txt" % genome),
mapping_dir, f"RPM_folds_{size_selected}", "all", "remapped", "{read_type}_on_%s_{biotype}_{orientation}_transcript_mean_log2_RPM_fold.txt" % genome),
log:
log = OPJ(log_dir, "gather_RPM_folds", "{read_type}_on_%s_{biotype}_{orientation}_transcript.log" % genome),
run:
......@@ -1897,7 +1909,7 @@ rule gather_remapped_RPM_folds:
# OPJ(mapping_dir, f"RPM_folds_{size_selected}",
# f"{contrast}", f"{contrast}_{wildcards.counted_type}_RPM_folds.txt"),
OPJ(mapping_dir, f"RPM_folds_{size_selected}",
f"{contrast}", f"{contrast}_{wildcards.read_type}_on_{genome}_{wildcards.biotype}_{wildcards.orientation}_transcript_RPM_folds.txt"),
f"{contrast}", "remapped", f"{contrast}_{wildcards.read_type}_on_{genome}_{wildcards.biotype}_{wildcards.orientation}_transcript_RPM_folds.txt"),
index_col=["gene", "cosmid", "name"],
usecols=["gene", "cosmid", "name", "mean_log2_RPM_fold"])["mean_log2_RPM_fold"] for contrast in IP_CONTRASTS}).to_csv(
output.all_folds, sep="\t")
......@@ -1905,7 +1917,9 @@ rule gather_remapped_RPM_folds:
# TODO add remapped_RPM_folds
def source_gathered_folds(wildcards):
if wildcards.fold_type == "mean_log2_RPM_fold":
if hasattr(wildcards, "counted_type"):
return rules.gather_remapped_RPM_folds.output.all_folds
elif wildcards.fold_type == "mean_log2_RPM_fold":
return rules.gather_RPM_folds.output.all_folds
else:
return rules.gather_DE_folds.output.all_folds
......@@ -3340,6 +3354,7 @@ def source_fold_data(wildcards):
return rules.small_RNA_differential_expression.output.counts_and_res
elif fold_type == "mean_log2_RPM_fold":
if hasattr(wildcards, "contrast_type"):
# We want all contrasts of this type.
#https://stackoverflow.com/a/26791923/1878788
#print("{0.small_type}".format(wildcards))
return [filename.format(wildcards) for filename in expand(
......@@ -3347,6 +3362,13 @@ def source_fold_data(wildcards):
"RPM_folds_%s" % size_selected, "{contrast}",
"{contrast}_{{0.small_type}}_RPM_folds.txt"),
contrast=contrasts_dict[wildcards.contrast_type])]
elif hasattr(wildcards, "biotypes"):
return [filename.format(wildcards) for filename in expand(
OPJ(mapping_dir,
"RPM_folds_%s" % size_selected,
"{{0.contrast}}", "remapped",
"{{0.contrast}}_{{0.read_type}}_on_%s_{biotype}_{{0.orientation}}_transcript_RPM_folds.txt" % genome),
biotype=wildcards.biotypes.split("_and_"))]
else:
return rules.compute_RPM_folds.output.fold_results
else:
......@@ -3382,6 +3404,7 @@ rule make_gene_list_lfc_boxplots:
rule make_contrast_lfc_boxplots:
"""For a given gene list, make one box for each contrast."""
input:
data = source_fold_data,
output:
......@@ -3406,6 +3429,31 @@ rule make_contrast_lfc_boxplots:
title=f"{wildcards.small_type} folds for {wildcards.contrast_type} contrasts")
rule make_biotype_lfc_boxplots:
"""For a given gene list, make one box for each contrast."""
input:
data = source_fold_data,
output:
boxplots = OPJ(output_dir, "figures", "{contrast}", "by_biotypes",
"{contrast}_{read_type}_on_%s_{biotypes}_{orientation}_transcript_{fold_type}_{gene_list}_boxplots.{fig_format}" % genome)
params:
id_lists = set_id_lists,
#shell:
# """touch {output.boxplots}"""
run:
lfcs = pd.DataFrame(
{f"{biotype}_{list_name}" : pd.read_table(filename, index_col="gene").loc[
set(id_list)]["mean_log2_RPM_fold"] for (
biotype, filename) in zip(wildcards.biotypes.split("_and_"), input.data) for (
list_name, id_list) in params.id_lists.items()})
save_plot(
output.boxplots,
plot_boxplots,
lfcs,
wildcards.fold_type,
title=f"{wildcards.read_type} folds for {wildcards.biotypes} contrasts")
##########################
# Read sequence analyses #
##########################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment