Skip to content
Snippets Groups Projects
Commit 64894825 authored by Blaise Li's avatar Blaise Li
Browse files

Option to exclude columns for PCA.

parent 6530db56
No related branches found
No related tags found
No related merge requests found
...@@ -667,7 +667,9 @@ methionine (M) and tryptophan (W). ...@@ -667,7 +667,9 @@ methionine (M) and tryptophan (W).
all_nan_cols) all_nan_cols)
def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"): def codon_usage_pca(
usage_data,
figs_dir=None, hue="chrom", exclude_cols=None):
""" """
Perform Principal Component Analysis on *usage_data*. Perform Principal Component Analysis on *usage_data*.
...@@ -689,10 +691,15 @@ def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"): ...@@ -689,10 +691,15 @@ def codon_usage_pca(usage_data, figs_dir=None, hue="chrom"):
in the first four principal components (0 vs. 1 and 2 vs. 3) in the first four principal components (0 vs. 1 and 2 vs. 3)
as well as graphics representing the influence of each data column as well as graphics representing the influence of each data column
on the first four principal components. on the first four principal components.
If *exclude_cols* is not None, the columns with the names contained
in the iterable *exclude_cols* will not be included in the PCA analysis.
""" """
if figs_dir is not None: if figs_dir is not None:
figs_dir = Path(figs_dir) figs_dir = Path(figs_dir)
figs_dir.mkdir(parents=True, exist_ok=True) figs_dir.mkdir(parents=True, exist_ok=True)
if exclude_cols is not None:
usage_data = usage_data.drop(columns=exclude_cols)
pca = PCA().fit(usage_data) pca = PCA().fit(usage_data)
transformed_data = pd.DataFrame( transformed_data = pd.DataFrame(
pca.transform(usage_data), pca.transform(usage_data),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment