Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
libcodonusage
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Blaise LI
libcodonusage
Commits
7b63a9e2
Commit
7b63a9e2
authored
3 years ago
by
Blaise Li
Browse files
Options
Downloads
Patches
Plain Diff
Optional codon influence in PCA.
parent
e6f55702
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libcodonusage/__init__.py
+1
-1
1 addition, 1 deletion
libcodonusage/__init__.py
libcodonusage/libcodonusage.py
+62
-25
62 additions, 25 deletions
libcodonusage/libcodonusage.py
with
63 additions
and
26 deletions
libcodonusage/__init__.py
+
1
−
1
View file @
7b63a9e2
__copyright__
=
"
Copyright (C) 2022 Blaise Li
"
__licence__
=
"
GNU GPLv3
"
__version__
=
"
0.2
5
"
__version__
=
"
0.2
6
"
from
.libcodonusage
import
(
aa2colour
,
aa_usage
,
...
...
This diff is collapsed.
Click to expand it.
libcodonusage/libcodonusage.py
+
62
−
25
View file @
7b63a9e2
...
...
@@ -674,10 +674,62 @@ methionine (M) and tryptophan (W).
all_nan_cols
)
def
codon_influence_in_components
(
components
,
colnames
,
figs_dir
=
None
,
formats
=
None
):
"""
Plot the influence of the columns in the first 4 principal axes of a PCA.
Each column should correspond to a codon, and will be represented as a
colour bar whose coulour is based on the last letter of the codon.
*components* should be a numpy array representing the principal
axes of a PCA, such as the `components_` attribute of a fitted
`sklearn.decomposition.PCA` object.
*colnames* should be the names of the columns in the *components*
array and are expected to match the following pattern:
<aa>_<codon>, where <aa> is a single-letter code for an amino-acid,
and <codon> is one of the 3-letter codons for this amino-acid,
in capital letters among A, T, G and C (i.e. in the DNA alphabet).
The last letter will be used to set the colour of a bar in the plots
*figs_dir* should be a path to a directory that will be used
to save graphics representing the influence of each data column
on the first four principal components.
*formats* should be a list of formats in which the figures should
be saved, such as
"
svg
"
or
"
png
"
.
"""
render_md
(
"
Vizualizing the influence of codons in the first 4 components
\n
"
)
# TODO: *figsize* could be adapted depending on the number of columns
(
fig
,
axes
)
=
plt
.
subplots
(
4
,
1
,
figsize
=
(
16
,
16
))
for
(
component
,
axis
)
in
enumerate
(
axes
):
pd
.
Series
(
components
[
component
],
index
=
colnames
).
plot
.
bar
(
ax
=
axes
[
component
],
# colname is supposed to end with the 3-letters codon
color
=
[
nuc2colour
[
colname
[
-
1
]]
for
colname
in
colnames
])
axis
.
set_ylabel
(
f
"
weight in component
{
component
}
"
)
# axis.set_xticklabels(axis.get_xticklabels(), rotation=90)
fig
.
subplots_adjust
(
hspace
=
.
5
)
if
figs_dir
is
not
None
and
formats
is
not
None
:
for
ext
in
formats
:
plt
.
savefig
(
figs_dir
.
joinpath
(
f
"
PCA_components.
{
ext
}
"
),
metadata
=
fmt_metadata
[
ext
])
display
(
fig
)
plt
.
close
(
fig
)
def
codon_usage_pca
(
usage_data
,
figs_dir
=
None
,
hue
=
"
chrom
"
,
exclude_cols
=
None
,
formats
=
None
):
formats
=
None
,
cols_are_codons
=
True
):
"""
Perform Principal Component Analysis on *usage_data*.
...
...
@@ -696,9 +748,12 @@ def codon_usage_pca(
If *figs_dir* is not None, this path to a directory will be used
to save graphics representing the projection of the observations
in the first four principal components (0 vs. 1 and 2 vs. 3)
as well as graphics representing the influence of each data column
in the first four principal components (0 vs. 1 and 2 vs. 3).
Unless *cols_are_codons* is set to False, there will also be
graphics representing the influence of each data column
on the first four principal components.
*formats* should be a list of formats in which the figures should
be saved, such as
"
svg
"
or
"
png
"
.
...
...
@@ -730,28 +785,10 @@ def codon_usage_pca(
metadata
=
fmt_metadata
[
ext
])
display
(
fig
)
plt
.
close
(
fig
)
render_md
(
"
Vizualizing the influence of codons in the first 4 components
\n
"
)
(
fig
,
axes
)
=
plt
.
subplots
(
4
,
1
,
figsize
=
(
16
,
16
))
for
(
component
,
axis
)
in
enumerate
(
axes
):
pd
.
Series
(
pca
.
components_
[
component
],
index
=
usage_data
.
columns
).
plot
.
bar
(
ax
=
axes
[
component
],
# colname is supposed to end with the 3-letters codon
color
=
[
nuc2colour
[
colname
[
-
1
]]
for
colname
in
usage_data
.
columns
])
axis
.
set_ylabel
(
f
"
weight in component
{
component
}
"
)
# axis.set_xticklabels(axis.get_xticklabels(), rotation=90)
fig
.
subplots_adjust
(
hspace
=
.
5
)
if
figs_dir
is
not
None
and
formats
is
not
None
:
for
ext
in
formats
:
plt
.
savefig
(
figs_dir
.
joinpath
(
f
"
PCA_components.
{
ext
}
"
),
metadata
=
fmt_metadata
[
ext
])
display
(
fig
)
plt
.
close
(
fig
)
if
cols_are_codons
:
codon_influence_in_components
(
pca
.
components_
,
usage_data
.
columns
,
figs_dir
=
figs_dir
,
formats
=
formats
)
return
(
pca
,
transformed_data
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment