From 36306d05cfe3b3148a265c29a0dc897781baa82f Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li__git@nsup.org>
Date: Wed, 22 May 2019 17:03:37 +0200
Subject: [PATCH] Added snakemake wrappers to compute RPK and TPM.

---
 snakemake_wrappers/compute_RPK/wrapper.py | 7 +++++++
 snakemake_wrappers/compute_TPM/wrapper.py | 5 +++++
 2 files changed, 12 insertions(+)
 create mode 100644 snakemake_wrappers/compute_RPK/wrapper.py
 create mode 100644 snakemake_wrappers/compute_TPM/wrapper.py

diff --git a/snakemake_wrappers/compute_RPK/wrapper.py b/snakemake_wrappers/compute_RPK/wrapper.py
new file mode 100644
index 0000000..f3a4218
--- /dev/null
+++ b/snakemake_wrappers/compute_RPK/wrapper.py
@@ -0,0 +1,7 @@
+import pandas as pd
+
+counts_data = pd.read_table(snakemake.input.counts_data, index_col="gene")
+feature_lengths = pd.read_table(snakemake.params.feature_lengths_file, index_col="gene")
+common = counts_data.index.intersection(feature_lengths.index)
+rpk = 1000 * counts_data.loc[common].div(feature_lengths.loc[common]["union_exon_len"], axis="index")
+rpk.to_csv(snakemake.output.rpk_file, sep="\t")
diff --git a/snakemake_wrappers/compute_TPM/wrapper.py b/snakemake_wrappers/compute_TPM/wrapper.py
new file mode 100644
index 0000000..f07c15a
--- /dev/null
+++ b/snakemake_wrappers/compute_TPM/wrapper.py
@@ -0,0 +1,5 @@
+import pandas as pd
+
+rpk = pd.read_table(snakemake.input.rpk_file, index_col="gene")
+tpm = 1000000 * rpk / rpk.sum()
+tpm.to_csv(snakemake.output.tpm_file, sep="\t")
-- 
GitLab