Skip to content
Snippets Groups Projects
Commit 0390d53d authored by Blaise Li's avatar Blaise Li
Browse files

Fixed column order, nicer output.

parent cf468c0f
No related branches found
No related tags found
No related merge requests found
__copyright__ = "Copyright (C) 2022 Blaise Li"
__licence__ = "GNU GPLv3"
__version__ = 0.5
__version__ = 0.6
from .libcodonusage import (
aa2colour,
codon2aa,
......@@ -11,6 +11,7 @@ from .libcodonusage import (
make_aa_codon_columns,
make_counts_only,
render_md,
save_counts_table,
sort_counts_by_aa,
violin_usage,
violin_usage_vertical,
......
......@@ -95,7 +95,7 @@ def load_counts_table(table_path, index_col="old_locus_tag"):
there are other columns containing various pieces of information
regarding those genes.
"""
render_md(f"Loading data from {table_path}...\n")
render_md(f"Loading data from [{table_path}]({table_path})...\n")
codon_counts = pd.read_table(table_path, index_col=index_col)
nb_genes = len(codon_counts)
render_md(
......@@ -176,6 +176,14 @@ def detect_fishy_genes(codon_counts):
A table of boolean criteria is returned, with one line per gene.
"""
def display_gene_set(gene_set, max_size=10):
"""
Print out genes in a gene set, depending on their number.
"""
if gene_set and len(gene_set) <= max_size:
print(" ", ", ".join(gene_set))
render_md("### Searching for mis-annotated genes")
(criteria, gene_sets) = compute_criteria(codon_counts)
render_md(
......@@ -188,13 +196,13 @@ def detect_fishy_genes(codon_counts):
the codon is not a valid start codon.
""")
wrong_start = gene_sets["wrong_start"]
render_md(f"There are {len(wrong_start)} genes that start with a `-`:")
display(wrong_start)
render_md(f"There are {len(wrong_start)} genes that start with a `-`.")
display_gene_set(wrong_start)
start_stop = gene_sets["start_stop"]
render_md(
f"There are {len(start_stop)} genes that start with a stop codon:")
display(start_stop)
f"There are {len(start_stop)} genes that start with a stop codon.")
display_gene_set(start_stop)
no_met_start = gene_sets["no_met_start"]
if no_met_start == wrong_start | start_stop:
......@@ -211,45 +219,45 @@ def detect_fishy_genes(codon_counts):
start_upstream = gene_sets["start_upstream"]
render_md(
f"{len(start_upstream)} genes have a possibly ill-defined"
" start position:")
display(start_upstream)
" start position.")
display_gene_set(start_upstream)
start_upstream_met_start = gene_sets["start_upstream_met_start"]
if start_upstream_met_start:
render_md(f"{len(start_upstream_met_start)} of them have a valid 'M'"
" start:")
display(start_upstream_met_start)
" start.")
display_gene_set(start_upstream_met_start)
start_upstream_met_start_nostops = gene_sets[
"start_upstream_met_start_nostops"]
if start_upstream_met_start_nostops:
render_md(f"{len(start_upstream_met_start_nostops)} of them"
" also do not contain any stop:")
display(start_upstream_met_start_nostops)
" also do not contain any stop.")
display_gene_set(start_upstream_met_start_nostops)
render_md("Those genes could probably be kept.")
has_stops_good_met_start = gene_sets["has_stops_good_met_start"]
render_md(
f"{len(has_stops_good_met_start)} genes contain stops"
" but have a well defined start position with 'M'.")
if len(has_stops_good_met_start) <= 10:
display(has_stops_good_met_start)
display_gene_set(has_stops_good_met_start)
good_met_start = gene_sets["good_met_start"]
render_md(
f"{len(good_met_start)} genes have a well defined "
"start position with 'M'.")
render_md(
"""If genes that have stop readthrough are a known phenomenon,
"""
If genes that have stop readthrough are a known phenomenon,
only those among them that do not have a valid start codon
might be excluded.""")
might be excluded.
""")
no_stop_good_met_start = gene_sets["no_stop_good_met_start"]
render_md(
f"{len(no_stop_good_met_start)} genes have a well defined"
" start position with 'M' and contain no stop codon.")
if len(no_stop_good_met_start) <= 10:
display(no_stop_good_met_start)
display_gene_set(no_stop_good_met_start)
return criteria
......@@ -257,14 +265,17 @@ def make_counts_only(counts_table):
"""
Integrate "informative" columns of *counts_table* into the index.
"""
# To ensure a stable order:
ref_info_cols = [
"old_locus_tag", "locus_tag", "length",
"start_codon", "expected_start_aa",
"first_stop", "nb_stops", "start_upstream", "end_downstream"]
# To ensure no info columns are lost:
info_cols = [
counts_table.index.name,
*counts_table.columns.difference(codon2aa)]
assert set(info_cols) == {
"old_locus_tag", "locus_tag", "length",
"start_codon", "expected_start_aa",
"first_stop", "nb_stops", "start_upstream", "end_downstream"}
return counts_table.reset_index().set_index(info_cols)
assert set(info_cols) == set(ref_info_cols)
return counts_table.reset_index().set_index(ref_info_cols)
def make_aa_codon_columns(counts_table):
......@@ -302,6 +313,15 @@ def sort_counts_by_aa(counts_table):
return sorted_counts
def save_counts_table(counts_table, table_path):
"""
Save table *counts_table* at *table_path*.
"""
table_path.parent.mkdir(parents=True, exist_ok=True)
counts_table.to_csv(table_path, sep="\t")
render_md(f"The table was saved at [{table_path}]({table_path}).")
def load_bias_table(table_path, nb_cluster_series=2):
"""
Load a table containing by-amino-acid codon usage biases.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment