diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py index 70da58fc2aa5234ad6c33f2b20504c4bea105b64..a95f8807b0dae91e549dea22c4d36b132ff2647a 100644 --- a/libcodonusage/libcodonusage.py +++ b/libcodonusage/libcodonusage.py @@ -667,7 +667,9 @@ methionine (M) and tryptophan (W). all_nan_cols) -def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"): +def codon_usage_pca( + usage_data, + figs_dir=None, hue="chrom", exclude_cols=None): """ Perform Principal Component Analysis on *usage_data*. @@ -689,10 +691,15 @@ def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"): in the first four principal components (0 vs. 1 and 2 vs. 3) as well as graphics representing the influence of each data column on the first four principal components. + + If *exclude_cols* is not None, the columns with the names contained + in the iterable *exclude_cols* will not be included in the PCA analysis. """ if figs_dir is not None: figs_dir = Path(figs_dir) figs_dir.mkdir(parents=True, exist_ok=True) + if exclude_cols is not None: + usage_data = usage_data.drop(columns=exclude_cols) pca = PCA().fit(usage_data) transformed_data = pd.DataFrame( pca.transform(usage_data),