From 648948253f9f14ca0626c3cf5e9001dbc7dbc39a Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li__git@nsup.org>
Date: Fri, 29 Apr 2022 17:08:47 +0200
Subject: [PATCH] Option to exclude columns for PCA.

---
 libcodonusage/libcodonusage.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py
index 70da58f..a95f880 100644
--- a/libcodonusage/libcodonusage.py
+++ b/libcodonusage/libcodonusage.py
@@ -667,7 +667,9 @@ methionine (M) and tryptophan (W).
         all_nan_cols)
 
 
-def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"):
+def codon_usage_pca(
+        usage_data,
+        figs_dir=None, hue="chrom", exclude_cols=None):
     """
     Perform Principal Component Analysis on *usage_data*.
 
@@ -689,10 +691,15 @@ def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"):
     in the first four principal components (0 vs. 1 and 2 vs. 3)
     as well as graphics representing the influence of each data column
     on the first four principal components.
+
+    If *exclude_cols* is not None, the columns with the names contained
+    in the iterable *exclude_cols* will not be included in the PCA analysis.
     """
     if figs_dir is not None:
         figs_dir = Path(figs_dir)
         figs_dir.mkdir(parents=True, exist_ok=True)
+    if exclude_cols is not None:
+        usage_data = usage_data.drop(columns=exclude_cols)
     pca = PCA().fit(usage_data)
     transformed_data = pd.DataFrame(
         pca.transform(usage_data),
-- 
GitLab