From 648948253f9f14ca0626c3cf5e9001dbc7dbc39a Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Fri, 29 Apr 2022 17:08:47 +0200 Subject: [PATCH] Option to exclude columns for PCA. --- libcodonusage/libcodonusage.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 70da58f..a95f880 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -667,7 +667,9 @@ methionine (M) and tryptophan (W). all_nan_cols) -def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"): +def codon_usage_pca( + usage_data, + figs_dir=None, hue="chrom", exclude_cols=None): """ Perform Principal Component Analysis on *usage_data*. @@ -689,10 +691,15 @@ def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"): in the first four principal components (0 vs. 1 and 2 vs. 3) as well as graphics representing the influence of each data column on the first four principal components. + + If *exclude_cols* is not None, the columns with the names contained + in the iterable *exclude_cols* will not be included in the PCA analysis. """ if figs_dir is not None: figs_dir = Path(figs_dir) figs_dir.mkdir(parents=True, exist_ok=True) + if exclude_cols is not None: + usage_data = usage_data.drop(columns=exclude_cols) pca = PCA().fit(usage_data) transformed_data = pd.DataFrame( pca.transform(usage_data), -- GitLab