diff --git a/libreads/__init__.py b/libreads/__init__.py
index 1de9f3913a4ccdef6dc9539c3904cceb7b044c0a..44dda87936945db8ed6c1cd638dd7dacedced890 100644
--- a/libreads/__init__.py
+++ b/libreads/__init__.py
@@ -1,5 +1,7 @@
 __version__ = 0.2
 from .libreads import (
+    ali2fq,
+    bam2fastq,
     make_read_statter,
     plot_base_logos_along_read,
     plot_base_logos_by_size,
diff --git a/libreads/libbamutils.pyx b/libreads/libbamutils.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..cf7795082b1a5bf2291c95a2ef8446731f58ad8f
--- /dev/null
+++ b/libreads/libbamutils.pyx
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+# vim: set fileencoding=<utf-8> :
+# cython: language_level=3
+"""
+This library contains a cythonized version of utilities to work with bam files.
+"""
+
+# http://stackoverflow.com/a/23575771/1878788
+# Also works by setting language_level as comment
+# at the top of the file.
+#from __future__ import print_function
+#from pybedtools import Interval
+from pysam.libchtslib cimport BAM_FREVERSE
+#cdef int BAM_FREVERSE = 16
+#from pysam.libcalignmmentfile cimport AlignmentFile
+from pysam.libcalignedsegment cimport AlignedSegment
+from pysam.libcutils cimport array_to_qualitystring
+
+cdef str cali2fq(AlignedSegment ali):
+    """Return a string representing the read aligned in AlignedSegment *ali*"""
+    seq = ali.get_forward_sequence()
+    qual = array_to_qualitystring(ali.get_forward_qualities())
+    name = ali.query_name
+    return f"@{name}\n{seq}\n+\n{qual}\n"
+
+def ali2fq(ali):
+    return cali2fq(ali)
+
+
+def bam2fastq(bam_reads,
+             chrom,
+             start,
+             end,
+             strand=None,
+             max_read_len=None):
+    """Generate fastq representations of reads mapped in *bam_reads*.
+    Only reads in region defined by *chrom*, *start* and *end* are considered.
+    If *strand* is "-" or "+", only the reads from the corresponding strand
+    will be generated.
+    If *max_read_len* is set, only the reads with a length
+    at most *max_read_len* will be generated.
+    """
+    if strand is None:
+        if max_read_len is None:
+            for ali in bam_reads.fetch(chrom, start, end):
+                yield cali2fq(ali)
+        else:
+            for ali in bam_reads.fetch(chrom, start, end):
+                if ali.query_length <= max_read_len:
+                    yield cali2fq(ali)
+    elif strand == "+":
+        if max_read_len is None:
+            for ali in bam_reads.fetch(chrom, start, end):
+                if ali.flag & BAM_FREVERSE == 0:
+                    yield cali2fq(ali)
+        else:
+            for ali in bam_reads.fetch(chrom, start, end):
+                if ali.flag & BAM_FREVERSE == 0:
+                    if ali.query_length <= max_read_len:
+                        yield cali2fq(ali)
+    elif strand == "-":
+        if max_read_len is None:
+            for ali in bam_reads.fetch(chrom, start, end):
+                if ali.flag & BAM_FREVERSE != 0:
+                    yield cali2fq(ali)
+        else:
+            for ali in bam_reads.fetch(chrom, start, end):
+                if ali.flag & BAM_FREVERSE != 0:
+                    if ali.query_length <= max_read_len:
+                        yield cali2fq(ali)
+    else:
+        raise ValueError("strand can only be \"+\", \"-\" or None.")
+
+
+
+
+
+# def bam2signal(bam_reads,
+#                chrom: str,
+#                start: int,
+#                end: int,
+#                max_read_len: int) -> Mapping[str, Array[int]]:
+#     """
+#     Extract 5'-end signal from `pysam.AlignmentFile` *bam_reeads*
+#     for the region defined by *chrom*, *start* and *end*.
+#     """
+#     signal: Mapping[str, Array[int]] = defaultdict(
+#         lambda: np.zeros(end-start + max_read_len, dtype=int))
+#     for read in collapse_and_sort(bam_reads.fetch(chrom, start, end)):
+#         try:
+#             # subttracting start, to translate chromosomic coordinates into array indices
+#             signal[read.strand][read.start - start] += 1
+#         except IndexError as err:
+#             # 5' of reverse reads may fall outside the right-most boundary of the island
+#             print(str(err))
+#             print(len(signal[read.strand]))
+#             print(read)
+#     return signal
+
+
+
+def main():
+    """Example use case."""
+    print("No examples to show here.")
+    return 0
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())
diff --git a/libreads/libreads.py b/libreads/libreads.py
index c8c983691e389de7a1296760d14ab4b334fb5288..3aca65f8c992284a72c980a2db9d29056820bfc4 100644
--- a/libreads/libreads.py
+++ b/libreads/libreads.py
@@ -8,6 +8,7 @@ import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from mappy import fastx_read
+from .libbamutils import ali2fq, bam2fastq
 
 
 
@@ -35,63 +36,34 @@ def make_read_statter(max_len=None, alphabet=None, processes=1):
     letter2index = dict((letter, idx) for (idx, letter) in enumerate(alphabet))
     # To use as DataFrame index
     base_index = pd.Index(list(alphabet), name="base")
+    # TODO: try to speed this using ProcessPoolExecutor?
+    def extract_seq_info(seq):
+        seq_len = len(seq)
+        if seq_len > max_len:
+            # raise ValueError(
+            #     f"{fastx_filename} contains reads of size {seq_len} "
+            #     f"(> {max_len})")
+            raise ValueError(
+                f"The file contains reads of size {seq_len} "
+                f"(> {max_len})")
+        as_letter_indices = [letter2index[letter] for letter in seq]
+        letter_indices_from_start = as_letter_indices[slicer_from_start]
+        letter_indices_from_end = as_letter_indices[slicer_from_end]
+        return (seq_len, seq[0], seq[-1],
+                letter_indices_from_start,
+                letter_indices_from_end,
+                range(seq_len))
 
-    def fastx2read_stats(fastx_filename):
-        """Return statistics about reads from file *fastx_filename*.
-
-        The statistics consist in a dict of tables with the following keys:
-        * "size": read size distribution as a :class:`pandas.Series`
-        * "composition_at_start"
-        * "composition_at_end"
-        * "composition_from_start"
-        * "composition_from_end"
-
-        Each of these tables is a :class:`pandas.DataFrame`
-        with *max_len* columns.
-        """
-        size = defaultdict(int)
-        first_base = defaultdict(lambda: defaultdict(int))
-        last_base = defaultdict(lambda: defaultdict(int))
-        composition_from_start = np.zeros((len(alphabet), max_len))
-        composition_from_end = np.zeros((len(alphabet), max_len))
-
-        for (_, seq, *_) in fastx_read(fastx_filename):
-            seq_len = len(seq)
-            if seq_len > max_len:
-                raise ValueError(
-                    f"{fastx_filename} contains reads of size {seq_len} "
-                    f"(> {max_len})")
-            for (pos, letter) in enumerate(seq[slicer_from_start]):
-                composition_from_start[letter2index[letter]][pos] += 1
-            for (pos, letter) in enumerate(seq[slicer_from_end]):
-                composition_from_end[letter2index[letter]][pos] += 1
-            size[seq_len] += 1
-            first_base[seq_len][seq[0]] += 1
-            last_base[seq_len][seq[-1]] += 1
-        composition_at_start = pd.DataFrame.from_dict(first_base).reindex(
-            base_index)
-        composition_at_start.columns.name = "read length"
-        composition_at_end = pd.DataFrame.from_dict(last_base).reindex(
-            base_index)
-        composition_at_end.columns.name = "read length"
-        composition_from_start = pd.DataFrame(
-            composition_from_start, index=base_index)
-        composition_from_start.columns = pd.Index(
-            range(1, max_len + 1), name="position_from_start")
-        composition_from_end = pd.DataFrame(
-            composition_from_end, index=base_index)
-        composition_from_end.columns = pd.Index(
-            range(1, max_len + 1), name="position_from_end")
-        return {
-            "total": pd.Series({"total": sum(size.values())}),
-            "size": pd.Series(size).sort_index(),
-            "composition_at_start": composition_at_start.fillna(0),
-            "composition_at_end": composition_at_end.fillna(0),
-            "composition_from_start": composition_from_start.fillna(0),
-            "composition_from_end": composition_from_end.fillna(0)
-            }
+    if processes > 1:
+        executor = ProcessPoolExecutor(max_workers=processes)
+        def do_extract_seq_info(seq_generator):
+            yield from executor.map(
+                extract_seq_info, seq_generator, chunksize=10000)
+    else:
+        def do_extract_seq_info(seq_generator):
+            yield from map(extract_seq_info, seq_generator)
 
-    def fastx2read_stats_parallel(fastx_filename):
+    def fastx2read_stats(fastx_filename):
         """Return statistics about reads from file *fastx_filename*.
 
         The statistics consist in a dict of tables with the following keys:
@@ -109,28 +81,12 @@ def make_read_statter(max_len=None, alphabet=None, processes=1):
         last_base = defaultdict(lambda: defaultdict(int))
         composition_from_start = np.zeros((len(alphabet), max_len))
         composition_from_end = np.zeros((len(alphabet), max_len))
-        executor = ProcessPoolExecutor(max_workers=processes)
 
-        # TODO: try to speed this using ProcessPoolExecutor?
-        def extract_seq_info(seq):
-            seq_len = len(seq)
-            if seq_len > max_len:
-                raise ValueError(
-                    f"{fastx_filename} contains reads of size {seq_len} "
-                    f"(> {max_len})")
-            as_letter_indices = [letter2index[letter] for letter in seq]
-            letter_indices_from_start = as_letter_indices[slicer_from_start]
-            letter_indices_from_end = as_letter_indices[slicer_from_end]
-            return (seq_len, seq[0], seq[-1],
-                    letter_indices_from_start,
-                    letter_indices_from_end,
-                    range(seq_len))
         seq_generator = (seq for (_, seq, *_) in fastx_read(fastx_filename))
         for (seq_len, start_letter, end_letter,
              letter_indices_from_start,
              letter_indices_from_end,
-             positions) in executor.map(
-                 extract_seq_info, seq_generator, chunksize=10000):
+             positions) in do_extract_seq_info(seq_generator):
             composition_from_start[letter_indices_from_start, positions] += 1
             composition_from_end[letter_indices_from_end, positions] += 1
             size[seq_len] += 1
@@ -158,8 +114,7 @@ def make_read_statter(max_len=None, alphabet=None, processes=1):
             "composition_from_start": composition_from_start.fillna(0),
             "composition_from_end": composition_from_end.fillna(0)
             }
-    if processes > 1:
-        return fastx2read_stats_parallel
+
     return fastx2read_stats
 
 
@@ -265,7 +220,8 @@ def plot_logo(data, axis, xlabel=None, ylabel=None, legend=None):
 
 
 def plot_base_logos_by_size(datas,
-                            data_labels=None, fig_path=None, ylabel=None):
+                            data_labels=None, fig_path=None, ylabel=None,
+                            fig=None, axes=None):
     """
     Plot bar graphs representing read composition using a representation
     where bar heights are proportional to the information content.
@@ -283,7 +239,14 @@ def plot_base_logos_by_size(datas,
     """
     if data_labels is None:
         data_labels = [str(i) for i in range(len(datas))]
-    (fig, axes, data_labels) = vstack_fig_setup(datas, data_labels)
+    if axes is None:
+        (fig, axes, data_labels) = vstack_fig_setup(datas, data_labels)
+        legend_holder = fig
+    else:
+        if fig is None:
+            legend_holder = axes[0]
+        else:
+            legend_holder = fig
     for (nucl_table, axis, label) in zip(datas, axes, data_labels):
         (handles, labels) = plot_logo(
             nucl_table, axis=axis, legend=False, xlabel=False, ylabel=False)
@@ -296,7 +259,7 @@ def plot_base_logos_by_size(datas,
     plt.ylabel(ylabel, labelpad=30)
     plt.title("")
     label2handle = dict(islice(zip(labels, handles), 5))
-    fig.legend(
+    legend_holder.legend(
         handles=[label2handle[nucl] for nucl in ["A", "C", "G", "T", "N"]],
         labels=["A", "C", "G", "T", "N"],
         loc=7)
@@ -308,7 +271,8 @@ def plot_base_logos_by_size(datas,
 
 def plot_base_logos_along_read(datas,
                                data_labels=None, fig_path=None,
-                               xlabel=None, ylabel=None):
+                               xlabel=None, ylabel=None,
+                               fig=None, axes=None):
     """
     Plot bar graphs representing read composition using a representation
     where bar heights are proportional to the information content.
@@ -326,7 +290,14 @@ def plot_base_logos_along_read(datas,
     """
     if data_labels is None:
         data_labels = [str(i) for i in range(len(datas))]
-    (fig, axes, data_labels) = vstack_fig_setup(datas, data_labels)
+    if axes is None:
+        (fig, axes, data_labels) = vstack_fig_setup(datas, data_labels)
+        legend_holder = fig
+    else:
+        if fig is None:
+            legend_holder = axes[0]
+        else:
+            legend_holder = fig
     for (nucl_table, axis, label) in zip(datas, axes, data_labels):
         (handles, labels) = plot_logo(
             nucl_table, axis=axis, legend=False, xlabel=False, ylabel=False)
@@ -341,7 +312,7 @@ def plot_base_logos_along_read(datas,
     plt.ylabel(ylabel, labelpad=30)
     plt.title("")
     label2handle = dict(islice(zip(labels, handles), 5))
-    fig.legend(
+    legend_holder.legend(
         handles=[label2handle[nucl] for nucl in "ACGTN"],
         labels="ACGTN",
         loc=7)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..20a5ff9a13cf4909c03fb38923c432da191c289a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[build-system]
+requires = [
+    "setuptools",
+    "Cython",
+    "pysam"
+]
+build-backend = "setuptools.build_meta"
diff --git a/setup.py b/setup.py
index fe2f974ce81c0d92bacd058447bea0c3a2c02b61..6e074c4927b8fe0137dd9ccc4d6ef3c5ca3c9bb8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,7 @@
 from setuptools import setup, find_packages
 #from Cython.Build import cythonize
+from distutils.extension import Extension
+from pysam import get_include as pysam_get_include
 
 name = "libreads"
 
@@ -10,6 +12,13 @@ for line in open("%s/__init__.py" % name):
         exec(line.strip())
 
 
+# https://github.com/cython/cython/blob/master/docs/src/reference/compilation.rst#configuring-the-c-build
+extensions = [
+    Extension(
+        "libreads.libbamutils", ["libreads/libbamutils.pyx"],
+        include_dirs=pysam_get_include()),
+    ]
+
 setup(
     name=name,
     version=__version__,
@@ -19,7 +28,14 @@ setup(
     license="MIT",
     packages=find_packages(),
     # scripts=["scripts/"],
+    ext_modules = extensions,
+    setup_requires=[
+        "wheel",
+        "cython",
+        #"pysam",
+    ],
     install_requires=[
+        "pysam",
         #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git",
         "matplotlib",
         #"networkx",