Skip to content
Snippets Groups Projects
Commit 8a5a8fbd authored by manselme's avatar manselme
Browse files

plot more components in PCA

parent 2a8b9218
Branches
No related tags found
No related merge requests found
__copyright__ = "Copyright (C) 2022-2023 Blaise Li"
__licence__ = "GNU GPLv3"
__version__ = "0.28.1"
from .libcodonusage import (
aa2colour,
aa_usage,
by_aa_codon_usage,
centroid_usage,
codon2aa,
codon_usage_pca,
columns_by_aa,
compare_clusterings,
compute_rscu,
detect_fishy_genes,
exclude_all_nan_cols,
extract_top_genes_from_cluster,
filter_on_idx_levels,
find_most_biased_genes,
find_valley,
format_codon_labels,
gene_wide_codon_usage,
load_bias_table,
load_counts_table,
load_table_with_info_index,
make_aa_codon_columns,
make_cluster_table,
make_centroids_cluster_finder,
make_counts_only,
plot_codon_usage_for_gene_list,
render_md,
save_counts_table,
sort_counts_by_aa,
split_info_index,
star2stop,
to_long_form,
violin_usage,
violin_usage_vertical,
violin_usage_by_clusters,
violin_usage_by_clusters_splitby,
violin_with_thresh,
write_cluster_lists,
)
This diff is collapsed.
...@@ -14,10 +14,12 @@ ...@@ -14,10 +14,12 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Functions used in Jupyter notebooks.""" """Functions used in Jupyter notebooks."""
from http.cookies import Morsel
from itertools import combinations from itertools import combinations
import json import json
from operator import attrgetter, itemgetter from operator import attrgetter, itemgetter
from pathlib import Path from pathlib import Path
from unittest import mock
# python3 -m pip install cytoolz # python3 -m pip install cytoolz
from cytoolz import concat, groupby, unique, valmap from cytoolz import concat, groupby, unique, valmap
# To render mardown in a Jupyter notebook on gitlab # To render mardown in a Jupyter notebook on gitlab
...@@ -139,6 +141,7 @@ def load_counts_table( ...@@ -139,6 +141,7 @@ def load_counts_table(
display(codon_counts.head(3)) display(codon_counts.head(3))
return codon_counts return codon_counts
#def filter_codon_counts_table(codon_counts, )
def compute_criteria(codon_counts): def compute_criteria(codon_counts):
""" """
...@@ -828,7 +831,7 @@ methionine (M) and tryptophan (W). ...@@ -828,7 +831,7 @@ methionine (M) and tryptophan (W).
def codon_influence_in_components( def codon_influence_in_components(
components, colnames, components, colnames,
figs_dir=None, formats=None): figs_dir=None, more_components=False, formats=None):
""" """
Plot the influence of the columns in the first 4 principal axes of a PCA. Plot the influence of the columns in the first 4 principal axes of a PCA.
...@@ -854,8 +857,11 @@ def codon_influence_in_components( ...@@ -854,8 +857,11 @@ def codon_influence_in_components(
be saved, such as "svg" or "png". be saved, such as "svg" or "png".
""" """
render_md( render_md(
"Vizualizing the influence of codons in the first 4 components\n") "Vizualizing the influence of codons in the first components\n")
# TODO: *figsize* could be adapted depending on the number of columns # TODO: *figsize* could be adapted depending on the number of columns
if more_components:
(fig, axes) = plt.subplots(12, 1, figsize=(16, 60))
else:
(fig, axes) = plt.subplots(4, 1, figsize=(16, 16)) (fig, axes) = plt.subplots(4, 1, figsize=(16, 16))
for (component, axis) in enumerate(axes): for (component, axis) in enumerate(axes):
pd.Series( pd.Series(
...@@ -880,7 +886,7 @@ def codon_influence_in_components( ...@@ -880,7 +886,7 @@ def codon_influence_in_components(
def codon_usage_pca( def codon_usage_pca(
usage_data, usage_data,
figs_dir=None, hue="chrom", exclude_cols=None, figs_dir=None, hue="chrom", exclude_cols=None, plot_more_components=False,
formats=None, cols_are_codons=True): formats=None, cols_are_codons=True):
""" """
Perform Principal Component Analysis on *usage_data*. Perform Principal Component Analysis on *usage_data*.
...@@ -922,14 +928,36 @@ def codon_usage_pca( ...@@ -922,14 +928,36 @@ def codon_usage_pca(
pca.transform(usage_data), pca.transform(usage_data),
index=usage_data.index).reset_index(level=hue) index=usage_data.index).reset_index(level=hue)
render_md( render_md(
"Plotting genes on the first 4 components\n") "Plotting genes on the first components\n")
if plot_more_components:
(fig, axes) = plt.subplots(3, 2, figsize=(16, 25))
sns.scatterplot(
data=transformed_data,
x=0, y=1, hue=hue, marker=".", ax=axes[0,0])
sns.scatterplot(
data=transformed_data,
x=2, y=3, hue=hue, marker=".", ax=axes[0,1])
sns.scatterplot(
data=transformed_data,
x=4, y=5, hue=hue, marker=".", ax=axes[1,0])
sns.scatterplot(
data=transformed_data,
x=6, y=7, hue=hue, marker=".", ax=axes[1,1])
sns.scatterplot(
data=transformed_data,
x=8, y=9, hue=hue, marker=".", ax=axes[2,0])
sns.scatterplot(
data=transformed_data,
x=10, y=11, hue=hue, marker=".", ax=axes[2,1])
else:
(fig, axes) = plt.subplots(1, 2, figsize=(16, 8)) (fig, axes) = plt.subplots(1, 2, figsize=(16, 8))
sns.scatterplot( sns.scatterplot(
data=transformed_data, data=transformed_data,
x=0, y=1, hue=hue, marker=".", ax=axes[0]) x=0, y=1, hue=hue, marker=".", ax=axes[0,0])
sns.scatterplot( sns.scatterplot(
data=transformed_data, data=transformed_data,
x=2, y=3, hue=hue, marker=".", ax=axes[1]) x=2, y=3, hue=hue, marker=".", ax=axes[0,1])
if figs_dir is not None and formats is not None: if figs_dir is not None and formats is not None:
for ext in formats: for ext in formats:
plt.savefig( plt.savefig(
...@@ -940,7 +968,7 @@ def codon_usage_pca( ...@@ -940,7 +968,7 @@ def codon_usage_pca(
if cols_are_codons: if cols_are_codons:
codon_influence_in_components( codon_influence_in_components(
pca.components_, usage_data.columns, pca.components_, usage_data.columns,
figs_dir=figs_dir, formats=formats) figs_dir=figs_dir, more_components=plot_more_components, formats=formats)
return (pca, transformed_data) return (pca, transformed_data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment