From 53fb043d42506fbe415b48eac7b9843d18288000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bertrand=20N=C3=A9ron?= <bneron@pasteur.fr> Date: Thu, 17 Oct 2024 15:53:48 +0200 Subject: [PATCH] create seaborn TP on happyness 2016 data set --- notebooks/Practicals/seaborn_TP.ipynb | 506 ++++++++++++++---- .../Solutions/seaborn_TP_solutions.ipynb | 198 +------ 2 files changed, 434 insertions(+), 270 deletions(-) diff --git a/notebooks/Practicals/seaborn_TP.ipynb b/notebooks/Practicals/seaborn_TP.ipynb index 47d0a4c..87eb3f2 100644 --- a/notebooks/Practicals/seaborn_TP.ipynb +++ b/notebooks/Practicals/seaborn_TP.ipynb @@ -8,13 +8,13 @@ "# <center><b>Hands-on</b></center>\n", "\n", "<div style=\"text-align:center\">\n", - " <img src=\"images/seaborn.png\" width=\"600px\">\n", + " <img src=\"../images/seaborn.png\" width=\"600px\">\n", " <div>\n", - " Bertrand Néron, François Laurent, Etienne Kornobis\n", + " Bertrand Néron, François Laurent, Etienne Kornobis, Vincent Guillemot\n", " <br />\n", " <a src=\" https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/\">Bioinformatics and Biostatistiqucs HUB</a>\n", " <br />\n", - " © Institut Pasteur, 2021\n", + " © Institut Pasteur, 2024\n", " </div> \n", "</div>" ] @@ -24,379 +24,695 @@ "id": "compliant-basis", "metadata": {}, "source": [ - "Practice your graphing skills using data from milieu intérieur in `data/mi.csv`:" + "Practice your graphing skills through the data of [happiness 2016](https://www.kaggle.com/datasets/unsdsn/world-happiness?select=2016.csv)\n", + "\n", + "(The data are already in data directory as `happiness_2016.csv`)" ] }, { "cell_type": "markdown", - "id": "departmental-exhibition", + "id": "3778963b-3bae-486d-8db7-30f23eb239ac", "metadata": {}, "source": [ - "- Do a boxplot showing the differences in temperature between females and males:" + "## Import the data and have a look on them\n", + "\n", + "1. import the pandas and seaborn modules\n", + "2. import the data" ] }, { "cell_type": "code", - "execution_count": null, - "id": "98e904b6-6e90-4c74-a463-2339d3961250", + "execution_count": 2, + "id": "minor-doctrine", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "skilled-daniel", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "portuguese-worse", + "id": "1360a875-cce6-4a2f-b19f-faf1b64230cc", "metadata": {}, "source": [ - "- Using a histogram and continuous probability density curve, display the distribution of age in the dataset" + "3. have a look on them" ] }, { "cell_type": "code", - "execution_count": null, - "id": "55756807-e1fb-4fb5-878c-5e46acea7a11", - "metadata": {}, - "outputs": [], + "execution_count": 4, + "id": "f8729a5b-314d-42fc-b130-783ca5e2076a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(157, 13)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "brutal-manufacturer", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Country</th>\n", + " <th>Region</th>\n", + " <th>Happiness Rank</th>\n", + " <th>Happiness Score</th>\n", + " <th>Lower Confidence Interval</th>\n", + " <th>Upper Confidence Interval</th>\n", + " <th>Economy (GDP per Capita)</th>\n", + " <th>Family</th>\n", + " <th>Health (Life Expectancy)</th>\n", + " <th>Freedom</th>\n", + " <th>Trust (Government Corruption)</th>\n", + " <th>Generosity</th>\n", + " <th>Dystopia Residual</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Denmark</td>\n", + " <td>Western Europe</td>\n", + " <td>1</td>\n", + " <td>7.526</td>\n", + " <td>7.460</td>\n", + " <td>7.592</td>\n", + " <td>1.44178</td>\n", + " <td>1.16374</td>\n", + " <td>0.79504</td>\n", + " <td>0.57941</td>\n", + " <td>0.44453</td>\n", + " <td>0.36171</td>\n", + " <td>2.73939</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Switzerland</td>\n", + " <td>Western Europe</td>\n", + " <td>2</td>\n", + " <td>7.509</td>\n", + " <td>7.428</td>\n", + " <td>7.590</td>\n", + " <td>1.52733</td>\n", + " <td>1.14524</td>\n", + " <td>0.86303</td>\n", + " <td>0.58557</td>\n", + " <td>0.41203</td>\n", + " <td>0.28083</td>\n", + " <td>2.69463</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Iceland</td>\n", + " <td>Western Europe</td>\n", + " <td>3</td>\n", + " <td>7.501</td>\n", + " <td>7.333</td>\n", + " <td>7.669</td>\n", + " <td>1.42666</td>\n", + " <td>1.18326</td>\n", + " <td>0.86733</td>\n", + " <td>0.56624</td>\n", + " <td>0.14975</td>\n", + " <td>0.47678</td>\n", + " <td>2.83137</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Norway</td>\n", + " <td>Western Europe</td>\n", + " <td>4</td>\n", + " <td>7.498</td>\n", + " <td>7.421</td>\n", + " <td>7.575</td>\n", + " <td>1.57744</td>\n", + " <td>1.12690</td>\n", + " <td>0.79579</td>\n", + " <td>0.59609</td>\n", + " <td>0.35776</td>\n", + " <td>0.37895</td>\n", + " <td>2.66465</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Finland</td>\n", + " <td>Western Europe</td>\n", + " <td>5</td>\n", + " <td>7.413</td>\n", + " <td>7.351</td>\n", + " <td>7.475</td>\n", + " <td>1.40598</td>\n", + " <td>1.13464</td>\n", + " <td>0.81091</td>\n", + " <td>0.57104</td>\n", + " <td>0.41004</td>\n", + " <td>0.25492</td>\n", + " <td>2.82596</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Country Region Happiness Rank Happiness Score \\\n", + "0 Denmark Western Europe 1 7.526 \n", + "1 Switzerland Western Europe 2 7.509 \n", + "2 Iceland Western Europe 3 7.501 \n", + "3 Norway Western Europe 4 7.498 \n", + "4 Finland Western Europe 5 7.413 \n", + "\n", + " Lower Confidence Interval Upper Confidence Interval \\\n", + "0 7.460 7.592 \n", + "1 7.428 7.590 \n", + "2 7.333 7.669 \n", + "3 7.421 7.575 \n", + "4 7.351 7.475 \n", + "\n", + " Economy (GDP per Capita) Family Health (Life Expectancy) Freedom \\\n", + "0 1.44178 1.16374 0.79504 0.57941 \n", + "1 1.52733 1.14524 0.86303 0.58557 \n", + "2 1.42666 1.18326 0.86733 0.56624 \n", + "3 1.57744 1.12690 0.79579 0.59609 \n", + "4 1.40598 1.13464 0.81091 0.57104 \n", + "\n", + " Trust (Government Corruption) Generosity Dystopia Residual \n", + "0 0.44453 0.36171 2.73939 \n", + "1 0.41203 0.28083 2.69463 \n", + "2 0.14975 0.47678 2.83137 \n", + "3 0.35776 0.37895 2.66465 \n", + "4 0.41004 0.25492 2.82596 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [] }, { "cell_type": "markdown", - "id": "prepared-stephen", + "id": "departmental-exhibition", "metadata": {}, "source": [ - "- Use a barplot to show the count of vaccinated for yellow fever (see the documentation for a countplot)" + "## Do a boxplot showing the differences in `happiness` between `Region`:" ] }, { "cell_type": "code", "execution_count": null, - "id": "1425046c-a058-45fe-95b5-5eca6ebbd33a", + "id": "saved-identity", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "immediate-method", + "id": "portuguese-worse", "metadata": {}, "source": [ - "- Plot the distribution of age for the people vaccinated for the flu" + "## Using a histogram and continuous probability density curve, display the distribution of `Freedom` in the dataset" ] }, { "cell_type": "code", "execution_count": null, - "id": "d567194c-3698-44c9-b5f8-b8a3d3493b0c", + "id": "continuous-indian", "metadata": {}, "outputs": [], "source": [] }, { - "cell_type": "markdown", - "id": "temporal-synthesis", + "cell_type": "code", + "execution_count": null, + "id": "understanding-vegetarian", "metadata": {}, - "source": [ - "- Feel free to explore more of [seaborn](https://seaborn.pydata.org/examples/index.html) !" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", - "id": "db56d49a-4770-4f9e-af6b-78960574d338", + "id": "prepared-stephen", "metadata": {}, "source": [ - "# Exploring count matrices from RNA-seq data" + "- Use a barplot to show the count of country per Region (see the documentation for a countplot)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "worldwide-communication", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", - "id": "5377668b-dea5-4c20-8249-5266f98774eb", + "id": "4e1d16f2-57d8-4f0e-9a69-e9ab193a3ebc", "metadata": {}, "source": [ - "<img src=\"images/rnaseq.png\" style=\"margin:0 auto;width:800px\">" + "As you can see the labels overlaps each ohers and are not readable\n", + "\n", + "One possibility is to rotate the X-labels. In this case is better to provide the labels." ] }, { - "cell_type": "markdown", - "id": "ebf1606b-0b21-4821-a899-551ec33c977e", + "cell_type": "code", + "execution_count": null, + "id": "64ee74bb-5f37-485f-8c03-d06ac14d3010", "metadata": {}, + "outputs": [], "source": [ - "- Import the count_matrix tsv file from the data folder" + "# extract the Region from the data, We will use them as labels for figures below" ] }, { "cell_type": "code", "execution_count": null, - "id": "eb53a1f5-9ea7-491e-bcfa-820cb1663af5", + "id": "eb7c96ac-585a-4787-a2ee-dec3d04790ca", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "c80d9947-9ccf-4499-a1c2-9194377cd054", + "id": "e2eac27c-2de9-4942-82b9-294318ec5fd4", "metadata": {}, "source": [ - "- Simplify the dataframe to only have the \"Geneid\", \"WTx\" and \"Cx\" columns" + "## On the same data `Happiness` and `Region` do a boxplot and a swarmplot to display the structure of the data" ] }, { "cell_type": "code", "execution_count": null, - "id": "56e90032-75ce-47b5-9cd3-95219cd7b26e", + "id": "b0fd6058-65b0-4f9b-bc59-dce0193f1580", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fe079b4-013a-4d48-ac31-9daad4b4673e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "eb65b51f-f689-4a66-b47c-e79f0e9eba52", + "id": "immediate-method", "metadata": {}, "source": [ - "- Format properly your DataFrame to be able to use https://seaborn.pydata.org/generated/seaborn.clustermap.html to realize a heatmap." + "## Plot the distribution of `happiness` for the people leaving `Western Europe`" ] }, { "cell_type": "code", "execution_count": null, - "id": "9b422fcb-7cc1-4766-92e3-276742381ae6", + "id": "academic-measure", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "f8d6188e-3a37-4ba5-b377-a11696054e9c", + "id": "fd9789db-9bab-478a-bcec-1c8b6775cf20", "metadata": {}, "source": [ - "- Explore the clustermap documentation to have a more visual heatmap by standardizing the data within genes." + "## Plot the `Health (Life Expectancy)` vs `Happiness Score` and color the dots according to the region specify a size for the figure (9 inches x 7) \n", + "\n", + "1. import pyplot\n", + "2. then create a new figure and axis at the right size\n", + "3. create the plot" ] }, { "cell_type": "code", "execution_count": null, - "id": "06be3f98-2167-44ac-9318-955286d77903", + "id": "d42f72f1-1d01-496e-89a1-68391ffa4281", "metadata": {}, "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": null, + "id": "52ae8376-3c66-4ca6-86a9-f9ae9f56076f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "temporal-synthesis", + "metadata": {}, + "source": [ + "- Feel free to explore more of [seaborn](https://seaborn.pydata.org/examples/index.html) !" + ] + }, { "cell_type": "markdown", - "id": "2e61a207-223a-4c01-88ea-76b1b8c3a0b9", + "id": "3063abf7-2251-48eb-b371-6c5b70b45fe7", "metadata": {}, "source": [ - "- Reformat the counts_df dataframe to have genes in columns and samples in rows.\n", - "- Add a \"group\" column defining the grouping of the samples:\n", - " - \"WTx\" samples will be from the \"WT\" group.\n", - " - \"Cx\" samples will be from the \"C\" group." + "## Do a barplot of the Happiness Score for each Region" ] }, { "cell_type": "code", "execution_count": null, - "id": "eea3f521-6960-44ab-ac0b-fcf5a002237f", + "id": "85dd0df6-74e7-43be-9a7c-eb922a06601b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "9a88ecb1-9ed3-4160-91ee-24a30e994b71", + "id": "3ee5741a-64f4-4690-963b-1f7e729398bf", "metadata": {}, "source": [ - "- Display a barplot showing the mean expression for each group for a particular gene (for example \"gene-LEPBI_RS00065\")." + "## from this point we will focus on the Regions\n", + "\n", + "### clean our dataset. Remove not relevant columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "cf74e85e-eef3-4023-bb88-5a864cf3c3f9", + "id": "0344b730-1535-47fb-82f5-07003fd223f9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "99e2455a-cb7d-44d5-a4a0-2cf272c814ab", + "id": "36e449d1-0add-4ebc-8903-d535219ce423", "metadata": {}, "source": [ - "- Try plotting a swarmplot on top of the previous barplot:" + "1. keep only columns 'Region', 'Happiness Score', 'Economy (GDP per Capita)', 'Family', 'Freedom','Trust (Government Corruption)', 'Generosity'\n", + "2. set the index to the Region\n", + "3. have a look on your new data" ] }, { "cell_type": "code", "execution_count": null, - "id": "7cf225f9-aea7-4cd9-ac90-a99592799527", + "id": "bdf897c4-b8f3-4dff-b9c0-0ad47b25ecc0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "d200d375-362e-4c1d-a88e-130b094e6feb", + "id": "e1ae03ac-ac7c-436d-987d-113e9cca3eec", "metadata": {}, "source": [ - "- Now plot the same data using a boxplot. Can you see the problem of displaying boxplots for this kind of data ?" + "## Aggregate the new data region by region. Compute the mean of each country as value for the corresponding Region" ] }, { "cell_type": "code", "execution_count": null, - "id": "e4daf00e-9a2c-4ec4-9d26-aa18aae5d82d", + "id": "3fc3ea89-a448-4e7b-abfb-3fa92cffc5f7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "2e1cabe0-aab7-4f0e-888e-81aae7d5df8d", + "id": "97cb188c-3e50-4492-961f-cadea3611aaa", "metadata": {}, "source": [ - "- Compute the median of each genes by groups:" + "## Do a hierarchically-clustered heatmap " ] }, { "cell_type": "code", "execution_count": null, - "id": "6ffd0f59-0fd7-41b9-a87a-c6e1a74145e8", + "id": "9aa21ed4-e9b2-4eb3-a693-c59ceb513552", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "308cc10b-6727-4bc5-b05d-4777037e252e", + "id": "88d27d29-e3b8-43d7-8324-25e50c247872", "metadata": {}, "source": [ - "We are going now to add extra annotations to this median table in order to identify genes of interest.\n", - "- Import the annotation.csv table from the data folder: " + "Check the data." ] }, { "cell_type": "code", "execution_count": null, - "id": "9be6ee5b-d497-47fa-8ac5-cf5514fd52c0", + "id": "0128f575-0b2a-4cbc-8f6e-8b7e22d81254", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "50fa81a7-3f34-4160-ad2d-f77d21be9ac0", + "id": "f9b39ab8-0051-4840-9e87-fe2bcb8ca07a", "metadata": {}, "source": [ - "Annotations in this table are available for many types of loci (the \"genetic_type\" column), but here we will focus on the \"gene\" genetic_type. \n", - "- Filter the annotation dataframe to have only \"gene\" as \"genetic_type\"." + "The data are not in the same range, so it could be better to standardize the data before to do the clustering" ] }, { "cell_type": "code", "execution_count": null, - "id": "f9a8bcf7-0bcc-43e8-828a-ec204658e528", + "id": "ff4beb57-b357-47a3-b7bd-877e05229b6b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e19f9472-cb9b-434b-8689-2bf09d49b902", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "f8a4e744-e7e2-43b6-b3d4-e59feb40d3ff", + "id": "d64a0377-339b-4fe7-beb4-a32e4a4e0113", "metadata": {}, "source": [ - "- Concatenate the dataframe with median by group and the annotation dataframe together:" + "It's possible to do that directly in seaborn. with the option z_score (https://seaborn.pydata.org/generated/seaborn.clustermap.html)" ] }, { "cell_type": "code", "execution_count": null, - "id": "afd8467a-33e1-4b9e-8f6d-b2229099c874", + "id": "3b439517-5007-4fbb-828d-265f9835594f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "af9f8e1f-5f8b-4152-b08a-44e957f13cec", + "id": "f8d6188e-3a37-4ba5-b377-a11696054e9c", + "metadata": {}, + "source": [ + "- Explore the clustermap documentation to have a more visual heatmap by standardizing the data within genes." + ] + }, + { + "cell_type": "markdown", + "id": "a2627322-e6a5-422f-8a69-b89dbd4b777e", + "metadata": {}, + "source": [ + "## Create a function which produce a single image with four different plots of your choice and save it to pdf file.\n", + "\n", + "like the image below." + ] + }, + { + "cell_type": "markdown", + "id": "4121ff3d-6814-493e-a505-357ad81b0d28", "metadata": {}, "source": [ - "- Calculate an estimate of the gene expression fold change for each gene (by dividing the C median expressions by WT median expressions).\n", - "- Add it as a \"FoldChange\" column to the previous dataframe." + "<img src=\"../images/multiple_figure.png\" width=\"50%\" />" ] }, { "cell_type": "code", "execution_count": null, - "id": "bb617d00-2c2d-45cc-ace0-3656dc999b17", + "id": "a322c866-9232-4fae-bcee-9a635e3fd70b", "metadata": {}, "outputs": [], "source": [] }, { - "cell_type": "markdown", - "id": "d70eb26b-0a26-4bbc-af03-ba8781b09fb5", + "cell_type": "code", + "execution_count": null, + "id": "044022d1-741d-4a07-ba7f-c1f863cca138", "metadata": {}, + "outputs": [], "source": [ - "- Use a barplot to display fold changes and using the new gene annotation (The \"Name\" column)" + "def expression_graph():\n", + " ...\n", + " return fig\n", + " " ] }, { "cell_type": "code", "execution_count": null, - "id": "4dd4cbee-547f-43f1-9ed7-173f3040b8d5", + "id": "c33bfc78-7480-4327-93a0-f8aaca0d3614", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "my_fig = expression_graph()\n", + "..." + ] + }, + { + "cell_type": "markdown", + "id": "0d05aba4-3c85-4cd9-85f3-5296b19308fb", + "metadata": {}, + "source": [ + "# Extras" + ] }, { "cell_type": "markdown", - "id": "34a26492-7c6b-4a07-a4de-67ec8f693cdc", + "id": "66d6668e-683f-462e-a72f-28bdda8736f2", "metadata": {}, "source": [ - "- By calculating the length of each gene and using a visualisation, does gene expression appears correlated with gene length ?" + "- Using ipywidget, make a function to display barplot of `Happiness Score` by country but with region selected by the user (using a Dropdown widget)" + ] + }, + { + "cell_type": "markdown", + "id": "042bd87e-d2dc-4544-a771-51d80c565d0f", + "metadata": {}, + "source": [ + "Imports the needed modules \n", + "- `widgets` and `interact` from the `ipywidgets` package\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "6f35b696-0807-4df4-9310-cb9197e7bf85", + "id": "64ebeca1-1332-4585-9e5c-c1b66f82be71", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from ipywidgets import widgets\n", + "from ipywidgets import interact" + ] }, { "cell_type": "markdown", - "id": "a2627322-e6a5-422f-8a69-b89dbd4b777e", + "id": "277264e6-a173-40c5-b71e-4cd551a7fa99", "metadata": {}, "source": [ - "- Create a function which produce a single image with four different plots of your choice and save it to pdf file." + "create a dataframe containing regions (without duplicates values)" ] }, { "cell_type": "code", "execution_count": null, - "id": "70e001a1-2848-4fb7-9f33-7beb4475e0fc", + "id": "ebf7fde9-b4a1-4e8a-86ab-86ad8b1b533a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "0d05aba4-3c85-4cd9-85f3-5296b19308fb", + "id": "f34e5053-ccf5-4a67-96db-7457fe16bbd6", "metadata": {}, "source": [ - "# Extras" + "1. Use this DataFarame to populate your dropdown list\n", + "2. Use the region selected in dropdown list as parameter of your function\n", + "3. select form the whole data frame the data corresponding to this region\n", + "4. display the barplot" ] }, { "cell_type": "markdown", - "id": "66d6668e-683f-462e-a72f-28bdda8736f2", + "id": "feba608f-2ecb-41ae-b04a-12f075fd644b", + "metadata": {}, + "source": [ + "below the code skeleton of your function\n", + "\n", + "```python\n", + "@interact(region=widgets.Dropdown(options=regions))\n", + "def plot_counts(region):\n", + " data = ha_df.loc[ha_df['Region'] == region]\n", + " ax = sns.barplot(data= ....\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb746fda-36cc-4c35-92d8-257a489fb278", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "3f4bd68e-eb26-46f8-a00f-86f9d0570580", "metadata": {}, "source": [ - "- Using ipywidget, make a function to display barplot of gene expression by groups with the gene being selected by the user (using a Dropdown widget for example)." + "You can customize your figure as classical seaborn/matplotib figure\n", + "\n", + "for instance to display the value above each bar" ] }, { "cell_type": "code", "execution_count": null, - "id": "e587f202-7ca4-43fb-ac3c-015c740c69d2", + "id": "7bcee7c5-f1c2-4035-9b7c-e68e1d73a932", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d78b7b86-ecaa-4d27-80ca-2d3e46c2aca3", "metadata": {}, "outputs": [], "source": [] @@ -404,9 +720,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:dev]", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "conda-env-dev-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -418,7 +734,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/notebooks/Solutions/seaborn_TP_solutions.ipynb b/notebooks/Solutions/seaborn_TP_solutions.ipynb index d96cdf7..7a9e21b 100644 --- a/notebooks/Solutions/seaborn_TP_solutions.ipynb +++ b/notebooks/Solutions/seaborn_TP_solutions.ipynb @@ -34,12 +34,15 @@ "id": "3778963b-3bae-486d-8db7-30f23eb239ac", "metadata": {}, "source": [ - "## Import the data and have a look on them" + "## Import the data and have a look on them\n", + "\n", + "1. import the pandas and seaborn modules\n", + "2. import the data" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "minor-doctrine", "metadata": {}, "outputs": [], @@ -50,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "skilled-daniel", "metadata": {}, "outputs": [], @@ -58,189 +61,30 @@ "ha_df = pd.read_csv(\"../data/happiness_2016.csv\")" ] }, + { + "cell_type": "markdown", + "id": "3d7b230d-ab79-4075-b3ce-248a3dc85fbb", + "metadata": {}, + "source": [ + "3. have a look on them" + ] + }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "f8729a5b-314d-42fc-b130-783ca5e2076a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(157, 13)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ha_df.shape" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "brutal-manufacturer", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>Country</th>\n", - " <th>Region</th>\n", - " <th>Happiness Rank</th>\n", - " <th>Happiness Score</th>\n", - " <th>Lower Confidence Interval</th>\n", - " <th>Upper Confidence Interval</th>\n", - " <th>Economy (GDP per Capita)</th>\n", - " <th>Family</th>\n", - " <th>Health (Life Expectancy)</th>\n", - " <th>Freedom</th>\n", - " <th>Trust (Government Corruption)</th>\n", - " <th>Generosity</th>\n", - " <th>Dystopia Residual</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>Denmark</td>\n", - " <td>Western Europe</td>\n", - " <td>1</td>\n", - " <td>7.526</td>\n", - " <td>7.460</td>\n", - " <td>7.592</td>\n", - " <td>1.44178</td>\n", - " <td>1.16374</td>\n", - " <td>0.79504</td>\n", - " <td>0.57941</td>\n", - " <td>0.44453</td>\n", - " <td>0.36171</td>\n", - " <td>2.73939</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>Switzerland</td>\n", - " <td>Western Europe</td>\n", - " <td>2</td>\n", - " <td>7.509</td>\n", - " <td>7.428</td>\n", - " <td>7.590</td>\n", - " <td>1.52733</td>\n", - " <td>1.14524</td>\n", - " <td>0.86303</td>\n", - " <td>0.58557</td>\n", - " <td>0.41203</td>\n", - " <td>0.28083</td>\n", - " <td>2.69463</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>Iceland</td>\n", - " <td>Western Europe</td>\n", - " <td>3</td>\n", - " <td>7.501</td>\n", - " <td>7.333</td>\n", - " <td>7.669</td>\n", - " <td>1.42666</td>\n", - " <td>1.18326</td>\n", - " <td>0.86733</td>\n", - " <td>0.56624</td>\n", - " <td>0.14975</td>\n", - " <td>0.47678</td>\n", - " <td>2.83137</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>Norway</td>\n", - " <td>Western Europe</td>\n", - " <td>4</td>\n", - " <td>7.498</td>\n", - " <td>7.421</td>\n", - " <td>7.575</td>\n", - " <td>1.57744</td>\n", - " <td>1.12690</td>\n", - " <td>0.79579</td>\n", - " <td>0.59609</td>\n", - " <td>0.35776</td>\n", - " <td>0.37895</td>\n", - " <td>2.66465</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>Finland</td>\n", - " <td>Western Europe</td>\n", - " <td>5</td>\n", - " <td>7.413</td>\n", - " <td>7.351</td>\n", - " <td>7.475</td>\n", - " <td>1.40598</td>\n", - " <td>1.13464</td>\n", - " <td>0.81091</td>\n", - " <td>0.57104</td>\n", - " <td>0.41004</td>\n", - " <td>0.25492</td>\n", - " <td>2.82596</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " Country Region Happiness Rank Happiness Score \\\n", - "0 Denmark Western Europe 1 7.526 \n", - "1 Switzerland Western Europe 2 7.509 \n", - "2 Iceland Western Europe 3 7.501 \n", - "3 Norway Western Europe 4 7.498 \n", - "4 Finland Western Europe 5 7.413 \n", - "\n", - " Lower Confidence Interval Upper Confidence Interval \\\n", - "0 7.460 7.592 \n", - "1 7.428 7.590 \n", - "2 7.333 7.669 \n", - "3 7.421 7.575 \n", - "4 7.351 7.475 \n", - "\n", - " Economy (GDP per Capita) Family Health (Life Expectancy) Freedom \\\n", - "0 1.44178 1.16374 0.79504 0.57941 \n", - "1 1.52733 1.14524 0.86303 0.58557 \n", - "2 1.42666 1.18326 0.86733 0.56624 \n", - "3 1.57744 1.12690 0.79579 0.59609 \n", - "4 1.40598 1.13464 0.81091 0.57104 \n", - "\n", - " Trust (Government Corruption) Generosity Dystopia Residual \n", - "0 0.44453 0.36171 2.73939 \n", - "1 0.41203 0.28083 2.69463 \n", - "2 0.14975 0.47678 2.83137 \n", - "3 0.35776 0.37895 2.66465 \n", - "4 0.41004 0.25492 2.82596 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ha_df.head()" ] @@ -397,7 +241,11 @@ "id": "fd9789db-9bab-478a-bcec-1c8b6775cf20", "metadata": {}, "source": [ - "## Plot the `Health (Life Expectancy)` vs `Happiness Score` and color the dots according to the region " + "## Plot the `Health (Life Expectancy)` vs `Happiness Score` and color the dots according to the region specify a size for the figure (9 inches x 7) \n", + "\n", + "1. import pyplot\n", + "2. then create a new figure and axis at the right size\n", + "3. create the plot" ] }, { -- GitLab