Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
libcodonusage
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Blaise LI
libcodonusage
Commits
e486cb54
Commit
e486cb54
authored
3 years ago
by
Blaise Li
Browse files
Options
Downloads
Patches
Plain Diff
Computation of cluster centroids.
parent
3b410576
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
libcodonusage/__init__.py
+2
-1
2 additions, 1 deletion
libcodonusage/__init__.py
libcodonusage/libcodonusage.py
+68
-2
68 additions, 2 deletions
libcodonusage/libcodonusage.py
with
70 additions
and
3 deletions
libcodonusage/__init__.py
+
2
−
1
View file @
e486cb54
__copyright__
=
"
Copyright (C) 2022 Blaise Li
"
__licence__
=
"
GNU GPLv3
"
__version__
=
"
0.1
2
"
__version__
=
"
0.1
3
"
from
.libcodonusage
import
(
aa2colour
,
aa_usage
,
by_aa_codon_usage
,
centroid_usage
,
codon2aa
,
columns_by_aa
,
detect_fishy_genes
,
...
...
This diff is collapsed.
Click to expand it.
libcodonusage/libcodonusage.py
+
68
−
2
View file @
e486cb54
...
...
@@ -17,6 +17,8 @@
import
json
from
operator
import
itemgetter
from
pathlib
import
Path
# python3 -m pip install cytoolz
from
cytoolz
import
groupby
# To render mardown in a Jupyter notebook on gitlab
from
IPython.core.display
import
display
,
HTML
# python3 -m pip install markdown
...
...
@@ -40,8 +42,6 @@ import numpy as np
# python3 -m pip install pandas
# See https://pandas.pydata.org/pandas-docs/stable/index.html
import
pandas
as
pd
# python3 -m pip install cytoolz
from
cytoolz
import
groupby
# Python module that facilitates the exploration of tbular data
# python3 -m pip install plydata
# from plydata import define, pull, query
...
...
@@ -569,6 +569,72 @@ methionine (M) and tryptophan (W).
all_nan_cols
)
def
centroid_usage
(
codon_counts
,
all_nan_cols
):
"""
Define
"
centroids
"
for gene clusters, one per codon in *codon_counts*.
The standardized usage biases of these centroids are computed so that
the codon proportions for a given amino-acid are the same as the global
proportions, except for the codons corresponding to a certain amino-acid.
For each amino-acid, there is one centroid per codon, where the
proportion for this codon is set to 1.0, and 0.0 for the other codons.
"""
summed_by_aa
=
codon_counts
.
groupby
(
level
=
0
,
axis
=
1
).
sum
()
global_usage
=
codon_counts
.
sum
(
axis
=
0
)
global_summed_by_aa
=
global_usage
.
groupby
(
level
=
0
).
sum
()
global_proportions_by_aa
=
global_usage
.
div
(
global_summed_by_aa
)
codon_proportions_by_aa
=
codon_counts
.
div
(
summed_by_aa
)
codon_usage_biases_by_aa
=
codon_proportions_by_aa
.
sub
(
global_proportions_by_aa
)
# _columns_by_aa may be different from columns_by_aa
# Groups of column headers for each amino-acid
# key: aa, value: list of (aa, codon) pairs
_columns_by_aa
=
groupby
(
itemgetter
(
0
),
codon_proportions_by_aa
.
columns
)
def
generate_centroid_proportions
():
for
(
aa1
,
columns
)
in
_columns_by_aa
.
items
():
for
(
_
,
codon1
)
in
columns
:
proportions
=
global_proportions_by_aa
.
copy
()
for
(
aa2
,
codon2
)
in
columns
:
proportions
[(
aa2
,
codon2
)]
=
0.0
proportions
[(
aa1
,
codon1
)]
=
1.0
proportions
.
name
=
(
f
"
{
aa1
}
_
{
codon1
}
"
,
f
"
{
aa1
}
_
{
codon1
}
"
,
*
np
.
full
(
codon_proportions_by_aa
.
index
.
nlevels
-
2
,
np
.
nan
))
yield
proportions
render_md
(
"""
For each codon, a
"
centroid
"
(future center of a cluster of genes)
is defined where the proportion of this codon is set to 1.0.
For the other codons corresponding to the same amino-acid, the proportion
is therefore set to 0.0.
For codons corresponding to other amino-acids, the proportions are set to
the proportions observed in the global data.
"""
)
centroids_proportions_by_aa
=
pd
.
DataFrame
(
generate_centroid_proportions
())
centroids_proportions_by_aa
.
index
.
rename
(
codon_proportions_by_aa
.
index
.
names
,
inplace
=
True
)
render_md
(
"""
The biases for the centroids are then computed by subtracting to those
proportions the proportions in the global data, and the biases are
standardized by dividing by the standard deviation of codon usage biases
in the data.
"""
)
# Compute biases using the same global proportions as the real data
# and standardize by the same standard deviations as the real data
centroids_scub_by_aa
=
centroids_proportions_by_aa
.
sub
(
global_proportions_by_aa
).
div
(
codon_usage_biases_by_aa
.
std
()).
drop
(
columns
=
all_nan_cols
)
# centroids_scub_by_aa.head(3)
return
centroids_scub_by_aa
def
load_bias_table
(
table_path
,
nb_info_cols
=
9
,
nb_cluster_series
=
2
):
"""
Load a table containing by-amino-acid codon usage biases.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment