diff --git a/libcodonusage/__init__.py b/libcodonusage/__init__.py
index 7c3e67e48f1f874e642a5d600b9779bcd90c9bcb..3eed697de8eaffd4b3af6ad444367b2ab57533b7 100644
--- a/libcodonusage/__init__.py
+++ b/libcodonusage/__init__.py
@@ -1,10 +1,12 @@
 __copyright__ = "Copyright (C) 2022 Blaise Li"
 __licence__ = "GNU GPLv3"
-__version__ = 0.1
+__version__ = 0.3
 from .libcodonusage import (
     aa2colour,
     columns_by_aa,
+    detect_fishy_genes,
     load_bias_table,
+    load_counts_table,
     render_md,
     violin_usage,
     violin_usage_vertical,
diff --git a/libcodonusage/libcodonusage.py b/libcodonusage/libcodonusage.py
index 088e124aed24b01011470505a9b39bcfcaf10246..de8d3c081a7f5302ee70cd03ae3885ac215dcec7 100644
--- a/libcodonusage/libcodonusage.py
+++ b/libcodonusage/libcodonusage.py
@@ -42,6 +42,12 @@ import numpy as np
 import pandas as pd
 # python3 -m pip install cytoolz
 from cytoolz import groupby
+# Python module that facilitates the exploration of tbular data
+# python3 -m pip install plydata
+# from plydata import define, pull, query
+# Python module to vizualize set intersections
+# python3 -m pip install upsetplot
+from upsetplot import from_indicators, UpSet
 
 
 def render_md(md_str):
@@ -76,8 +82,141 @@ with Path(bgraphs.colorschemes._scheme_dir).joinpath(
 # value: hexadecimal html colour code
 aa2colour = {**colscheme["colors"], "*": '#000000'}
 
+
 def load_counts_table(table_path, index_col="old_locus_tag"):
+    """
+    Load a table or pre-computed codon counts at *table_path*.
+
+    The lines correspond to genes.
+    Besides the columns containing the counts for each codon,
+    there are other columns containing various pieces of information
+    regarding those genes.
+    """
+    render_md(f"Loading data from {table_path}...\n")
     codon_counts = pd.read_table(table_path, index_col=index_col)
+    nb_genes = len(codon_counts)
+    render_md(
+        f"""
+        There are {nb_genes} genes in the table.
+
+        The table looks as follows (first 3 lines):
+        """)
+    display(codon_counts.head(3))
+    return codon_counts
+
+
+def detect_fishy_genes(codon_counts):
+    """
+    Run diagnostics about genes included in table *codon_counts*.
+
+    These should help decide whether to exclude some of the genes.
+
+    A table of boolean criteria is returned, with one line per gene.
+    """
+    render_md("### Searching for mis-annotated genes")
+    wrong_start_col = codon_counts["expected_start_aa"] == "-"
+    start_stop_col = codon_counts["expected_start_aa"] == "*"
+    no_met_start_col = codon_counts["expected_start_aa"] != "M"
+    start_upstream_col = codon_counts["start_upstream"]
+    has_stops_col = codon_counts["nb_stops"] > 0
+    start_upstream_met_start_col = start_upstream_col & ~no_met_start_col
+    start_upstream_met_start_nostops_col = (
+        start_upstream_met_start_col & ~has_stops_col)
+    has_stops_good_met_start_col = (
+        has_stops_col & ~no_met_start_col
+        & ~start_upstream_met_start_col)
+    no_stop_good_met_start_col = (
+        ~has_stops_col & ~no_met_start_col
+        & ~start_upstream_met_start_col)
+    criteria = pd.DataFrame({
+        "wrong_start": wrong_start_col,
+        "start_stop": start_stop_col,
+        "no_met_start": no_met_start_col,
+        "start_upstream": start_upstream_col,
+        "has_stops": has_stops_col,
+        "start_upstream_met_start": start_upstream_met_start_col,
+        "start_upstream_met_start_nostops": (
+            start_upstream_met_start_nostops_col),
+        "has_stops_good_met_start": has_stops_good_met_start_col})
+    render_md("Number of genes in potentially fishy categories:\n\n")
+    display(criteria.agg(sum))
+    render_md("Upset plot of the non-empty categories:\n\n")
+    fig = plt.figure()
+    UpSet(
+        from_indicators(*[criteria.columns], data=criteria),
+        show_counts=True).plot(fig=fig)
+    display(fig)
+    plt.close(fig)
+    wrong_start = set(criteria[wrong_start_col].index)
+    start_stop = set(criteria[start_stop_col].index)
+    no_met_start = set(criteria[no_met_start_col].index)
+    start_upstream = set(criteria[start_upstream_col].index)
+    # Not used
+    # has_stops = set(criteria[has_stops_col].index)
+    start_upstream_met_start = set(
+        criteria[start_upstream_met_start_col].index)
+    start_upstream_met_start_nostops = set(
+        criteria[start_upstream_met_start_nostops_col].index)
+    has_stops_good_met_start = set(
+        criteria[has_stops_good_met_start_col].index)
+    no_stop_good_met_start = set(criteria[no_stop_good_met_start_col].index)
+    render_md(
+        """
+        We should avoid genes that are not in-frame. We can likely exclude
+        those that do not start with a valid start codon.
+        Here, this will be seen by looking at the translation of the first
+        codon.
+        If it is "*" (`start_stop`) or "-" (`wrong_start`),
+        the codon is not a valid start codon.
+        """)
+    render_md(f"There are {len(wrong_start)} genes that start with a `-`:")
+    display(wrong_start)
+
+    render_md(
+        f"There are {len(start_stop)} genes that start with a stop codon:")
+    display(start_stop)
+
+    if no_met_start == wrong_start | start_stop:
+        render_md(
+            """
+            Genes not belonging to the `start_stop` or `wrong_start`
+            category all start with a methionine.
+            """)
+    else:
+        raise NotImplementedError(
+            "There are genes that start with something else than 'M', "
+            "'-' or '*'.")
+
+    render_md(
+        f"{len(start_upstream)} genes have a possibly ill-defined"
+        " start position:")
+    display(start_upstream)
+    if start_upstream_met_start:
+        render_md(f"{len(start_upstream_met_start)} of them have a valid 'M'"
+                  " start:")
+        display(start_upstream_met_start)
+
+        if start_upstream_met_start_nostops:
+            render_md(f"{len(start_upstream_met_start_nostops)} of them"
+                      " also do not contain any stop:")
+            display(start_upstream_met_start_nostops)
+            render_md("Those genes could probably be kept.")
+    render_md(
+        f"{len(has_stops_good_met_start)} genes contain stops"
+        " but have a well defined start position with 'M'.")
+    if len(has_stops_good_met_start) <= 10:
+        display(has_stops_good_met_start)
+    render_md(
+        """If genes that have stop readthrough are a known phenomenon,
+        only those among them that do not have a valid start codon
+        might be excluded.""")
+    render_md(
+        f"{len(no_stop_good_met_start)} genes have a well defined"
+        " start position with 'M' and contain no stop codon.")
+    if len(no_stop_good_met_start) <= 10:
+        display(no_stop_good_met_start)
+    return criteria
+
 
 def load_bias_table(table_path, nb_cluster_series=2):
     """
@@ -109,7 +248,7 @@ def load_bias_table(table_path, nb_cluster_series=2):
         # * `cluster_{aa}_full_bias` for each amino-acid
         #   having more than one codon
         index_col=list(range(9 + nb_cluster_series * (len(aa2colour) - 2))),
-        header=[0,1])
+        header=[0, 1])
 
 
 def boxplot_usage(usage_table, ylabel, whiskers="1.5 IQR"):