From 53fb043d42506fbe415b48eac7b9843d18288000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bertrand=20N=C3=A9ron?= <bneron@pasteur.fr>
Date: Thu, 17 Oct 2024 15:53:48 +0200
Subject: [PATCH] create seaborn TP on happyness 2016 data set

---
 notebooks/Practicals/seaborn_TP.ipynb         | 506 ++++++++++++++----
 .../Solutions/seaborn_TP_solutions.ipynb      | 198 +------
 2 files changed, 434 insertions(+), 270 deletions(-)

diff --git a/notebooks/Practicals/seaborn_TP.ipynb b/notebooks/Practicals/seaborn_TP.ipynb
index 47d0a4c..87eb3f2 100644
--- a/notebooks/Practicals/seaborn_TP.ipynb
+++ b/notebooks/Practicals/seaborn_TP.ipynb
@@ -8,13 +8,13 @@
     "# <center><b>Hands-on</b></center>\n",
     "\n",
     "<div style=\"text-align:center\">\n",
-    "    <img src=\"images/seaborn.png\" width=\"600px\">\n",
+    "    <img src=\"../images/seaborn.png\" width=\"600px\">\n",
     "    <div>\n",
-    "       Bertrand Néron, François Laurent, Etienne Kornobis\n",
+    "       Bertrand Néron, François Laurent, Etienne Kornobis, Vincent Guillemot\n",
     "       <br />\n",
     "       <a src=\" https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/\">Bioinformatics and Biostatistiqucs HUB</a>\n",
     "       <br />\n",
-    "       © Institut Pasteur, 2021\n",
+    "       © Institut Pasteur, 2024\n",
     "    </div>    \n",
     "</div>"
    ]
@@ -24,379 +24,695 @@
    "id": "compliant-basis",
    "metadata": {},
    "source": [
-    "Practice your graphing skills using data from milieu intérieur in `data/mi.csv`:"
+    "Practice your graphing skills through the data of [happiness 2016](https://www.kaggle.com/datasets/unsdsn/world-happiness?select=2016.csv)\n",
+    "\n",
+    "(The data are already in data directory as `happiness_2016.csv`)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "departmental-exhibition",
+   "id": "3778963b-3bae-486d-8db7-30f23eb239ac",
    "metadata": {},
    "source": [
-    "- Do a boxplot showing the differences in temperature between females and males:"
+    "## Import the data and have a look on them\n",
+    "\n",
+    "1. import the pandas and seaborn modules\n",
+    "2. import the data"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "98e904b6-6e90-4c74-a463-2339d3961250",
+   "execution_count": 2,
+   "id": "minor-doctrine",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "skilled-daniel",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "portuguese-worse",
+   "id": "1360a875-cce6-4a2f-b19f-faf1b64230cc",
    "metadata": {},
    "source": [
-    "- Using a histogram and continuous probability density curve, display the distribution of age in the dataset"
+    "3. have a look on them"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "55756807-e1fb-4fb5-878c-5e46acea7a11",
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 4,
+   "id": "f8729a5b-314d-42fc-b130-783ca5e2076a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(157, 13)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "brutal-manufacturer",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Country</th>\n",
+       "      <th>Region</th>\n",
+       "      <th>Happiness Rank</th>\n",
+       "      <th>Happiness Score</th>\n",
+       "      <th>Lower Confidence Interval</th>\n",
+       "      <th>Upper Confidence Interval</th>\n",
+       "      <th>Economy (GDP per Capita)</th>\n",
+       "      <th>Family</th>\n",
+       "      <th>Health (Life Expectancy)</th>\n",
+       "      <th>Freedom</th>\n",
+       "      <th>Trust (Government Corruption)</th>\n",
+       "      <th>Generosity</th>\n",
+       "      <th>Dystopia Residual</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Denmark</td>\n",
+       "      <td>Western Europe</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7.526</td>\n",
+       "      <td>7.460</td>\n",
+       "      <td>7.592</td>\n",
+       "      <td>1.44178</td>\n",
+       "      <td>1.16374</td>\n",
+       "      <td>0.79504</td>\n",
+       "      <td>0.57941</td>\n",
+       "      <td>0.44453</td>\n",
+       "      <td>0.36171</td>\n",
+       "      <td>2.73939</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Switzerland</td>\n",
+       "      <td>Western Europe</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7.509</td>\n",
+       "      <td>7.428</td>\n",
+       "      <td>7.590</td>\n",
+       "      <td>1.52733</td>\n",
+       "      <td>1.14524</td>\n",
+       "      <td>0.86303</td>\n",
+       "      <td>0.58557</td>\n",
+       "      <td>0.41203</td>\n",
+       "      <td>0.28083</td>\n",
+       "      <td>2.69463</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Iceland</td>\n",
+       "      <td>Western Europe</td>\n",
+       "      <td>3</td>\n",
+       "      <td>7.501</td>\n",
+       "      <td>7.333</td>\n",
+       "      <td>7.669</td>\n",
+       "      <td>1.42666</td>\n",
+       "      <td>1.18326</td>\n",
+       "      <td>0.86733</td>\n",
+       "      <td>0.56624</td>\n",
+       "      <td>0.14975</td>\n",
+       "      <td>0.47678</td>\n",
+       "      <td>2.83137</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Norway</td>\n",
+       "      <td>Western Europe</td>\n",
+       "      <td>4</td>\n",
+       "      <td>7.498</td>\n",
+       "      <td>7.421</td>\n",
+       "      <td>7.575</td>\n",
+       "      <td>1.57744</td>\n",
+       "      <td>1.12690</td>\n",
+       "      <td>0.79579</td>\n",
+       "      <td>0.59609</td>\n",
+       "      <td>0.35776</td>\n",
+       "      <td>0.37895</td>\n",
+       "      <td>2.66465</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Finland</td>\n",
+       "      <td>Western Europe</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7.413</td>\n",
+       "      <td>7.351</td>\n",
+       "      <td>7.475</td>\n",
+       "      <td>1.40598</td>\n",
+       "      <td>1.13464</td>\n",
+       "      <td>0.81091</td>\n",
+       "      <td>0.57104</td>\n",
+       "      <td>0.41004</td>\n",
+       "      <td>0.25492</td>\n",
+       "      <td>2.82596</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Country          Region  Happiness Rank  Happiness Score  \\\n",
+       "0      Denmark  Western Europe               1            7.526   \n",
+       "1  Switzerland  Western Europe               2            7.509   \n",
+       "2      Iceland  Western Europe               3            7.501   \n",
+       "3       Norway  Western Europe               4            7.498   \n",
+       "4      Finland  Western Europe               5            7.413   \n",
+       "\n",
+       "   Lower Confidence Interval  Upper Confidence Interval  \\\n",
+       "0                      7.460                      7.592   \n",
+       "1                      7.428                      7.590   \n",
+       "2                      7.333                      7.669   \n",
+       "3                      7.421                      7.575   \n",
+       "4                      7.351                      7.475   \n",
+       "\n",
+       "   Economy (GDP per Capita)   Family  Health (Life Expectancy)  Freedom  \\\n",
+       "0                   1.44178  1.16374                   0.79504  0.57941   \n",
+       "1                   1.52733  1.14524                   0.86303  0.58557   \n",
+       "2                   1.42666  1.18326                   0.86733  0.56624   \n",
+       "3                   1.57744  1.12690                   0.79579  0.59609   \n",
+       "4                   1.40598  1.13464                   0.81091  0.57104   \n",
+       "\n",
+       "   Trust (Government Corruption)  Generosity  Dystopia Residual  \n",
+       "0                        0.44453     0.36171            2.73939  \n",
+       "1                        0.41203     0.28083            2.69463  \n",
+       "2                        0.14975     0.47678            2.83137  \n",
+       "3                        0.35776     0.37895            2.66465  \n",
+       "4                        0.41004     0.25492            2.82596  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "prepared-stephen",
+   "id": "departmental-exhibition",
    "metadata": {},
    "source": [
-    "- Use a barplot to show the count of vaccinated for yellow fever (see the documentation for a countplot)"
+    "## Do a boxplot showing the differences in `happiness` between `Region`:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1425046c-a058-45fe-95b5-5eca6ebbd33a",
+   "id": "saved-identity",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "immediate-method",
+   "id": "portuguese-worse",
    "metadata": {},
    "source": [
-    "- Plot the distribution of age for the people vaccinated for the flu"
+    "## Using a histogram and continuous probability density curve, display the distribution of `Freedom` in the dataset"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d567194c-3698-44c9-b5f8-b8a3d3493b0c",
+   "id": "continuous-indian",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
-   "cell_type": "markdown",
-   "id": "temporal-synthesis",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "understanding-vegetarian",
    "metadata": {},
-   "source": [
-    "- Feel free to explore more of [seaborn](https://seaborn.pydata.org/examples/index.html) !"
-   ]
+   "outputs": [],
+   "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "db56d49a-4770-4f9e-af6b-78960574d338",
+   "id": "prepared-stephen",
    "metadata": {},
    "source": [
-    "# Exploring count matrices from RNA-seq data"
+    "- Use a barplot to show the count of country per Region (see the documentation for a countplot)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "worldwide-communication",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
-   "id": "5377668b-dea5-4c20-8249-5266f98774eb",
+   "id": "4e1d16f2-57d8-4f0e-9a69-e9ab193a3ebc",
    "metadata": {},
    "source": [
-    "<img src=\"images/rnaseq.png\" style=\"margin:0 auto;width:800px\">"
+    "As you can see the labels overlaps each ohers and are not readable\n",
+    "\n",
+    "One possibility is to rotate the X-labels. In this case is better to provide the labels."
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "ebf1606b-0b21-4821-a899-551ec33c977e",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64ee74bb-5f37-485f-8c03-d06ac14d3010",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "- Import the count_matrix tsv file from the data folder"
+    "# extract the Region from the data, We will use them as labels for figures below"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "eb53a1f5-9ea7-491e-bcfa-820cb1663af5",
+   "id": "eb7c96ac-585a-4787-a2ee-dec3d04790ca",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "c80d9947-9ccf-4499-a1c2-9194377cd054",
+   "id": "e2eac27c-2de9-4942-82b9-294318ec5fd4",
    "metadata": {},
    "source": [
-    "- Simplify the dataframe to only have the \"Geneid\", \"WTx\" and \"Cx\" columns"
+    "## On the same data `Happiness` and `Region` do a boxplot and a swarmplot to display the structure of the data"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "56e90032-75ce-47b5-9cd3-95219cd7b26e",
+   "id": "b0fd6058-65b0-4f9b-bc59-dce0193f1580",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1fe079b4-013a-4d48-ac31-9daad4b4673e",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "eb65b51f-f689-4a66-b47c-e79f0e9eba52",
+   "id": "immediate-method",
    "metadata": {},
    "source": [
-    "- Format properly your DataFrame to be able to use  https://seaborn.pydata.org/generated/seaborn.clustermap.html to realize a heatmap."
+    "## Plot the distribution of `happiness` for the people leaving `Western Europe`"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9b422fcb-7cc1-4766-92e3-276742381ae6",
+   "id": "academic-measure",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "f8d6188e-3a37-4ba5-b377-a11696054e9c",
+   "id": "fd9789db-9bab-478a-bcec-1c8b6775cf20",
    "metadata": {},
    "source": [
-    "- Explore the clustermap documentation to have a more visual heatmap by standardizing the data within genes."
+    "## Plot the `Health (Life Expectancy)` vs `Happiness Score` and color the dots according to the region specify a size for the figure (9 inches x 7) \n",
+    "\n",
+    "1. import pyplot\n",
+    "2. then create a new figure and axis at the right size\n",
+    "3. create the plot"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "06be3f98-2167-44ac-9318-955286d77903",
+   "id": "d42f72f1-1d01-496e-89a1-68391ffa4281",
    "metadata": {},
    "outputs": [],
    "source": []
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52ae8376-3c66-4ca6-86a9-f9ae9f56076f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "temporal-synthesis",
+   "metadata": {},
+   "source": [
+    "- Feel free to explore more of [seaborn](https://seaborn.pydata.org/examples/index.html) !"
+   ]
+  },
   {
    "cell_type": "markdown",
-   "id": "2e61a207-223a-4c01-88ea-76b1b8c3a0b9",
+   "id": "3063abf7-2251-48eb-b371-6c5b70b45fe7",
    "metadata": {},
    "source": [
-    "- Reformat the counts_df dataframe to have genes in columns and samples in rows.\n",
-    "- Add a \"group\" column defining the grouping of the samples:\n",
-    "    - \"WTx\" samples will be from the \"WT\" group.\n",
-    "    - \"Cx\" samples will be from the \"C\" group."
+    "## Do a barplot of the Happiness Score for each Region"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "eea3f521-6960-44ab-ac0b-fcf5a002237f",
+   "id": "85dd0df6-74e7-43be-9a7c-eb922a06601b",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "9a88ecb1-9ed3-4160-91ee-24a30e994b71",
+   "id": "3ee5741a-64f4-4690-963b-1f7e729398bf",
    "metadata": {},
    "source": [
-    "- Display a barplot showing the mean expression for each group for a particular gene (for example \"gene-LEPBI_RS00065\")."
+    "## from this point we will focus on the Regions\n",
+    "\n",
+    "### clean our dataset. Remove not relevant columns"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cf74e85e-eef3-4023-bb88-5a864cf3c3f9",
+   "id": "0344b730-1535-47fb-82f5-07003fd223f9",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "99e2455a-cb7d-44d5-a4a0-2cf272c814ab",
+   "id": "36e449d1-0add-4ebc-8903-d535219ce423",
    "metadata": {},
    "source": [
-    "- Try plotting a swarmplot on top of the previous barplot:"
+    "1. keep only columns 'Region', 'Happiness Score', 'Economy (GDP per Capita)', 'Family', 'Freedom','Trust (Government Corruption)', 'Generosity'\n",
+    "2. set the index to the Region\n",
+    "3. have a look on your new data"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7cf225f9-aea7-4cd9-ac90-a99592799527",
+   "id": "bdf897c4-b8f3-4dff-b9c0-0ad47b25ecc0",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "d200d375-362e-4c1d-a88e-130b094e6feb",
+   "id": "e1ae03ac-ac7c-436d-987d-113e9cca3eec",
    "metadata": {},
    "source": [
-    "- Now plot the same data using a boxplot. Can you see the problem of displaying boxplots for this kind of data ?"
+    "## Aggregate the new data region by region. Compute the mean of each country as value for the corresponding Region"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e4daf00e-9a2c-4ec4-9d26-aa18aae5d82d",
+   "id": "3fc3ea89-a448-4e7b-abfb-3fa92cffc5f7",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "2e1cabe0-aab7-4f0e-888e-81aae7d5df8d",
+   "id": "97cb188c-3e50-4492-961f-cadea3611aaa",
    "metadata": {},
    "source": [
-    "- Compute the median of each genes by groups:"
+    "## Do a hierarchically-clustered heatmap "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6ffd0f59-0fd7-41b9-a87a-c6e1a74145e8",
+   "id": "9aa21ed4-e9b2-4eb3-a693-c59ceb513552",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "308cc10b-6727-4bc5-b05d-4777037e252e",
+   "id": "88d27d29-e3b8-43d7-8324-25e50c247872",
    "metadata": {},
    "source": [
-    "We are going now to add extra annotations to this median table in order to identify genes of interest.\n",
-    "- Import the annotation.csv table from the data folder: "
+    "Check the data."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9be6ee5b-d497-47fa-8ac5-cf5514fd52c0",
+   "id": "0128f575-0b2a-4cbc-8f6e-8b7e22d81254",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "50fa81a7-3f34-4160-ad2d-f77d21be9ac0",
+   "id": "f9b39ab8-0051-4840-9e87-fe2bcb8ca07a",
    "metadata": {},
    "source": [
-    "Annotations in this table are available for many types of loci (the \"genetic_type\" column), but here we will focus on the \"gene\" genetic_type. \n",
-    "- Filter the annotation dataframe to have only \"gene\" as \"genetic_type\"."
+    "The data are not in the same range, so it could be better to standardize the data before to do the clustering"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f9a8bcf7-0bcc-43e8-828a-ec204658e528",
+   "id": "ff4beb57-b357-47a3-b7bd-877e05229b6b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e19f9472-cb9b-434b-8689-2bf09d49b902",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "f8a4e744-e7e2-43b6-b3d4-e59feb40d3ff",
+   "id": "d64a0377-339b-4fe7-beb4-a32e4a4e0113",
    "metadata": {},
    "source": [
-    "- Concatenate the dataframe with median by group and the annotation dataframe together:"
+    "It's possible to do that directly in seaborn. with the option z_score (https://seaborn.pydata.org/generated/seaborn.clustermap.html)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "afd8467a-33e1-4b9e-8f6d-b2229099c874",
+   "id": "3b439517-5007-4fbb-828d-265f9835594f",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "af9f8e1f-5f8b-4152-b08a-44e957f13cec",
+   "id": "f8d6188e-3a37-4ba5-b377-a11696054e9c",
+   "metadata": {},
+   "source": [
+    "- Explore the clustermap documentation to have a more visual heatmap by standardizing the data within genes."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2627322-e6a5-422f-8a69-b89dbd4b777e",
+   "metadata": {},
+   "source": [
+    "## Create a function which produce a single image with four different plots of your choice and save it to pdf file.\n",
+    "\n",
+    "like the image below."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4121ff3d-6814-493e-a505-357ad81b0d28",
    "metadata": {},
    "source": [
-    "- Calculate an estimate of the gene expression fold change for each gene (by dividing the C median expressions by WT median expressions).\n",
-    "- Add it as a \"FoldChange\" column to the previous dataframe."
+    "<img src=\"../images/multiple_figure.png\" width=\"50%\" />"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bb617d00-2c2d-45cc-ace0-3656dc999b17",
+   "id": "a322c866-9232-4fae-bcee-9a635e3fd70b",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
-   "cell_type": "markdown",
-   "id": "d70eb26b-0a26-4bbc-af03-ba8781b09fb5",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "044022d1-741d-4a07-ba7f-c1f863cca138",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "- Use a barplot to display fold changes and using the new gene annotation (The \"Name\" column)"
+    "def expression_graph():\n",
+    "    ...\n",
+    "    return fig\n",
+    "    "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4dd4cbee-547f-43f1-9ed7-173f3040b8d5",
+   "id": "c33bfc78-7480-4327-93a0-f8aaca0d3614",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "my_fig = expression_graph()\n",
+    "..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0d05aba4-3c85-4cd9-85f3-5296b19308fb",
+   "metadata": {},
+   "source": [
+    "# Extras"
+   ]
   },
   {
    "cell_type": "markdown",
-   "id": "34a26492-7c6b-4a07-a4de-67ec8f693cdc",
+   "id": "66d6668e-683f-462e-a72f-28bdda8736f2",
    "metadata": {},
    "source": [
-    "- By calculating the length of each gene and using a visualisation, does gene expression appears correlated with gene length ?"
+    "- Using ipywidget, make a function to display barplot of `Happiness Score` by country but with region selected by the user (using a Dropdown widget)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "042bd87e-d2dc-4544-a771-51d80c565d0f",
+   "metadata": {},
+   "source": [
+    "Imports the needed modules \n",
+    "- `widgets` and `interact` from the `ipywidgets` package\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6f35b696-0807-4df4-9310-cb9197e7bf85",
+   "id": "64ebeca1-1332-4585-9e5c-c1b66f82be71",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "from ipywidgets import widgets\n",
+    "from ipywidgets import interact"
+   ]
   },
   {
    "cell_type": "markdown",
-   "id": "a2627322-e6a5-422f-8a69-b89dbd4b777e",
+   "id": "277264e6-a173-40c5-b71e-4cd551a7fa99",
    "metadata": {},
    "source": [
-    "- Create a function which produce a single image with four different plots of your choice and save it to pdf file."
+    "create a dataframe containing regions (without duplicates values)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "70e001a1-2848-4fb7-9f33-7beb4475e0fc",
+   "id": "ebf7fde9-b4a1-4e8a-86ab-86ad8b1b533a",
    "metadata": {},
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "markdown",
-   "id": "0d05aba4-3c85-4cd9-85f3-5296b19308fb",
+   "id": "f34e5053-ccf5-4a67-96db-7457fe16bbd6",
    "metadata": {},
    "source": [
-    "# Extras"
+    "1. Use this DataFarame to populate your dropdown list\n",
+    "2. Use the region selected in dropdown list as parameter of your function\n",
+    "3. select form the whole data frame the data corresponding to this region\n",
+    "4. display the barplot"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "66d6668e-683f-462e-a72f-28bdda8736f2",
+   "id": "feba608f-2ecb-41ae-b04a-12f075fd644b",
+   "metadata": {},
+   "source": [
+    "below the code skeleton of your function\n",
+    "\n",
+    "```python\n",
+    "@interact(region=widgets.Dropdown(options=regions))\n",
+    "def plot_counts(region):\n",
+    "    data = ha_df.loc[ha_df['Region'] == region]\n",
+    "    ax = sns.barplot(data= ....\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb746fda-36cc-4c35-92d8-257a489fb278",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f4bd68e-eb26-46f8-a00f-86f9d0570580",
    "metadata": {},
    "source": [
-    "- Using ipywidget, make a function to display barplot of gene expression by groups with the gene being selected by the user (using a Dropdown widget for example)."
+    "You can customize your figure as classical seaborn/matplotib figure\n",
+    "\n",
+    "for instance to display the value above each bar"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e587f202-7ca4-43fb-ac3c-015c740c69d2",
+   "id": "7bcee7c5-f1c2-4035-9b7c-e68e1d73a932",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d78b7b86-ecaa-4d27-80ca-2d3e46c2aca3",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -404,9 +720,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:dev]",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "conda-env-dev-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -418,7 +734,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/Solutions/seaborn_TP_solutions.ipynb b/notebooks/Solutions/seaborn_TP_solutions.ipynb
index d96cdf7..7a9e21b 100644
--- a/notebooks/Solutions/seaborn_TP_solutions.ipynb
+++ b/notebooks/Solutions/seaborn_TP_solutions.ipynb
@@ -34,12 +34,15 @@
    "id": "3778963b-3bae-486d-8db7-30f23eb239ac",
    "metadata": {},
    "source": [
-    "## Import the data and have a look on them"
+    "## Import the data and have a look on them\n",
+    "\n",
+    "1. import the pandas and seaborn modules\n",
+    "2. import the data"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "minor-doctrine",
    "metadata": {},
    "outputs": [],
@@ -50,7 +53,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "skilled-daniel",
    "metadata": {},
    "outputs": [],
@@ -58,189 +61,30 @@
     "ha_df = pd.read_csv(\"../data/happiness_2016.csv\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "3d7b230d-ab79-4075-b3ce-248a3dc85fbb",
+   "metadata": {},
+   "source": [
+    "3. have a look on them"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "f8729a5b-314d-42fc-b130-783ca5e2076a",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(157, 13)"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "ha_df.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "brutal-manufacturer",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Country</th>\n",
-       "      <th>Region</th>\n",
-       "      <th>Happiness Rank</th>\n",
-       "      <th>Happiness Score</th>\n",
-       "      <th>Lower Confidence Interval</th>\n",
-       "      <th>Upper Confidence Interval</th>\n",
-       "      <th>Economy (GDP per Capita)</th>\n",
-       "      <th>Family</th>\n",
-       "      <th>Health (Life Expectancy)</th>\n",
-       "      <th>Freedom</th>\n",
-       "      <th>Trust (Government Corruption)</th>\n",
-       "      <th>Generosity</th>\n",
-       "      <th>Dystopia Residual</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Denmark</td>\n",
-       "      <td>Western Europe</td>\n",
-       "      <td>1</td>\n",
-       "      <td>7.526</td>\n",
-       "      <td>7.460</td>\n",
-       "      <td>7.592</td>\n",
-       "      <td>1.44178</td>\n",
-       "      <td>1.16374</td>\n",
-       "      <td>0.79504</td>\n",
-       "      <td>0.57941</td>\n",
-       "      <td>0.44453</td>\n",
-       "      <td>0.36171</td>\n",
-       "      <td>2.73939</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Switzerland</td>\n",
-       "      <td>Western Europe</td>\n",
-       "      <td>2</td>\n",
-       "      <td>7.509</td>\n",
-       "      <td>7.428</td>\n",
-       "      <td>7.590</td>\n",
-       "      <td>1.52733</td>\n",
-       "      <td>1.14524</td>\n",
-       "      <td>0.86303</td>\n",
-       "      <td>0.58557</td>\n",
-       "      <td>0.41203</td>\n",
-       "      <td>0.28083</td>\n",
-       "      <td>2.69463</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Iceland</td>\n",
-       "      <td>Western Europe</td>\n",
-       "      <td>3</td>\n",
-       "      <td>7.501</td>\n",
-       "      <td>7.333</td>\n",
-       "      <td>7.669</td>\n",
-       "      <td>1.42666</td>\n",
-       "      <td>1.18326</td>\n",
-       "      <td>0.86733</td>\n",
-       "      <td>0.56624</td>\n",
-       "      <td>0.14975</td>\n",
-       "      <td>0.47678</td>\n",
-       "      <td>2.83137</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Norway</td>\n",
-       "      <td>Western Europe</td>\n",
-       "      <td>4</td>\n",
-       "      <td>7.498</td>\n",
-       "      <td>7.421</td>\n",
-       "      <td>7.575</td>\n",
-       "      <td>1.57744</td>\n",
-       "      <td>1.12690</td>\n",
-       "      <td>0.79579</td>\n",
-       "      <td>0.59609</td>\n",
-       "      <td>0.35776</td>\n",
-       "      <td>0.37895</td>\n",
-       "      <td>2.66465</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Finland</td>\n",
-       "      <td>Western Europe</td>\n",
-       "      <td>5</td>\n",
-       "      <td>7.413</td>\n",
-       "      <td>7.351</td>\n",
-       "      <td>7.475</td>\n",
-       "      <td>1.40598</td>\n",
-       "      <td>1.13464</td>\n",
-       "      <td>0.81091</td>\n",
-       "      <td>0.57104</td>\n",
-       "      <td>0.41004</td>\n",
-       "      <td>0.25492</td>\n",
-       "      <td>2.82596</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "       Country          Region  Happiness Rank  Happiness Score  \\\n",
-       "0      Denmark  Western Europe               1            7.526   \n",
-       "1  Switzerland  Western Europe               2            7.509   \n",
-       "2      Iceland  Western Europe               3            7.501   \n",
-       "3       Norway  Western Europe               4            7.498   \n",
-       "4      Finland  Western Europe               5            7.413   \n",
-       "\n",
-       "   Lower Confidence Interval  Upper Confidence Interval  \\\n",
-       "0                      7.460                      7.592   \n",
-       "1                      7.428                      7.590   \n",
-       "2                      7.333                      7.669   \n",
-       "3                      7.421                      7.575   \n",
-       "4                      7.351                      7.475   \n",
-       "\n",
-       "   Economy (GDP per Capita)   Family  Health (Life Expectancy)  Freedom  \\\n",
-       "0                   1.44178  1.16374                   0.79504  0.57941   \n",
-       "1                   1.52733  1.14524                   0.86303  0.58557   \n",
-       "2                   1.42666  1.18326                   0.86733  0.56624   \n",
-       "3                   1.57744  1.12690                   0.79579  0.59609   \n",
-       "4                   1.40598  1.13464                   0.81091  0.57104   \n",
-       "\n",
-       "   Trust (Government Corruption)  Generosity  Dystopia Residual  \n",
-       "0                        0.44453     0.36171            2.73939  \n",
-       "1                        0.41203     0.28083            2.69463  \n",
-       "2                        0.14975     0.47678            2.83137  \n",
-       "3                        0.35776     0.37895            2.66465  \n",
-       "4                        0.41004     0.25492            2.82596  "
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "ha_df.head()"
    ]
@@ -397,7 +241,11 @@
    "id": "fd9789db-9bab-478a-bcec-1c8b6775cf20",
    "metadata": {},
    "source": [
-    "## Plot the `Health (Life Expectancy)` vs `Happiness Score` and color the dots according to the region  "
+    "## Plot the `Health (Life Expectancy)` vs `Happiness Score` and color the dots according to the region specify a size for the figure (9 inches x 7) \n",
+    "\n",
+    "1. import pyplot\n",
+    "2. then create a new figure and axis at the right size\n",
+    "3. create the plot"
    ]
   },
   {
-- 
GitLab