From 7b4a7f7b518032641fbd0a9f8591f68699aabbb9 Mon Sep 17 00:00:00 2001 From: Etienne Kornobis <ekornobis@gmail.com> Date: Sun, 26 Sep 2021 22:07:07 +0200 Subject: [PATCH] Adding pandas course and practices --- data/blast.txt | 176 + notebooks/images/pandas_dataframe.png | Bin 0 -> 34313 bytes notebooks/images/pandas_logo.svg | 1 + notebooks/jupyter_cours.ipynb | 125 +- notebooks/jupyter_practice.ipynb | 64 +- notebooks/pandas_TP.ipynb | 481 ++ notebooks/pandas_TP_solution.ipynb | 2933 ++++++++++ notebooks/pandas_cours.ipynb | 7757 +++++++++++++++++++++++++ 8 files changed, 11456 insertions(+), 81 deletions(-) create mode 100644 data/blast.txt create mode 100644 notebooks/images/pandas_dataframe.png create mode 100644 notebooks/images/pandas_logo.svg create mode 100644 notebooks/pandas_TP.ipynb create mode 100644 notebooks/pandas_TP_solution.ipynb create mode 100644 notebooks/pandas_cours.ipynb diff --git a/data/blast.txt b/data/blast.txt new file mode 100644 index 0000000..2a19a67 --- /dev/null +++ b/data/blast.txt @@ -0,0 +1,176 @@ +AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.00 316 0 0 1 316 1 316 0.0 654 +AK1BA_HUMAN sp|C9JRZ8|AK1BF_HUMAN 91.16 294 26 0 23 316 51 344 0.0 559 +AK1BA_HUMAN sp|O08782|ALD2_CRIGR 83.23 316 53 0 1 316 1 316 0.0 537 +AK1BA_HUMAN sp|P45377|ALD2_MOUSE 82.28 316 56 0 1 316 1 316 0.0 527 +AK1BA_HUMAN sp|P21300|ALD1_MOUSE 79.75 316 64 0 1 316 1 316 0.0 515 +AK1BA_HUMAN sp|Q5RJP0|ALD1_RAT 78.16 316 69 0 1 316 1 316 2e-177 501 +AK1BA_HUMAN sp|P15122|ALDR_RABIT 72.15 316 88 0 1 316 1 316 1e-162 462 +AK1BA_HUMAN sp|P07943|ALDR_RAT 71.11 315 91 0 1 315 1 315 3e-161 459 +AK1BA_HUMAN sp|P15121|ALDR_HUMAN 70.57 316 93 0 1 316 1 316 1e-160 458 +AK1BA_HUMAN sp|P45376|ALDR_MOUSE 70.48 315 93 0 1 315 1 315 2e-160 457 +AK1BA_HUMAN sp|P16116|ALDR_BOVIN 72.12 312 87 0 5 316 4 315 4e-159 454 +AK1BA_HUMAN sp|P80276|ALDR_PIG 71.52 316 90 0 1 316 1 316 7e-158 451 +AK1BA_HUMAN sp|P82125|AKCL2_PIG 60.00 305 116 1 12 316 3 301 7e-131 382 +AK1BA_HUMAN sp|Q4R802|AKCL2_MACFA 54.46 325 123 2 11 316 2 320 2e-119 353 +AK1BA_HUMAN sp|Q96JD6|AKCL2_HUMAN 54.46 325 123 3 11 316 2 320 2e-117 348 +AK1BA_HUMAN sp|Q9DCT1|AKCL2_MOUSE 56.91 304 125 1 13 316 4 301 4e-117 347 +AK1BA_HUMAN sp|Q6AZW2|A1A1A_DANRE 56.04 298 128 2 1 297 1 296 1e-116 346 +AK1BA_HUMAN sp|Q5U1Y4|AKCL2_RAT 56.39 305 127 1 12 316 3 301 3e-116 344 +AK1BA_HUMAN sp|Q8VCX1|AK1D1_MOUSE 51.90 316 148 2 5 316 10 325 5e-111 332 +AK1BA_HUMAN sp|P51857|AK1D1_HUMAN 50.79 317 151 2 5 316 10 326 8e-111 331 +AK1BA_HUMAN sp|Q9TV64|AK1D1_RABIT 50.79 317 151 2 5 316 10 326 3e-110 330 +AK1BA_HUMAN sp|Q9JII6|AK1A1_MOUSE 50.15 325 150 3 2 316 3 325 1e-108 326 +AK1BA_HUMAN sp|P31210|AK1D1_RAT 51.74 317 148 3 5 316 10 326 1e-108 326 +AK1BA_HUMAN sp|P51635|AK1A1_RAT 50.15 325 150 3 2 316 3 325 1e-108 325 +AK1BA_HUMAN sp|Q5R5D5|AK1A1_PONAB 48.92 325 154 3 2 316 3 325 3e-106 320 +AK1BA_HUMAN sp|P14550|AK1A1_HUMAN 48.92 325 154 3 2 316 3 325 4e-106 319 +AK1BA_HUMAN sp|P80508|PE2R_RABIT 50.63 316 152 2 5 316 8 323 6e-106 319 +AK1BA_HUMAN sp|P50578|AK1A1_PIG 49.54 325 152 3 2 316 3 325 3e-105 317 +AK1BA_HUMAN sp|Q5ZK84|AK1A1_CHICK 52.01 323 143 3 4 316 7 327 6e-105 317 +AK1BA_HUMAN sp|Q6GMC7|AK1A1_XENLA 51.37 329 145 4 1 316 1 327 3e-104 315 +AK1BA_HUMAN sp|Q28FD1|AK1A1_XENTR 51.37 329 145 4 1 316 1 327 3e-103 312 +AK1BA_HUMAN sp|P52895|AK1C2_HUMAN 48.73 316 158 2 5 316 8 323 9e-103 311 +AK1BA_HUMAN sp|Q3ZCJ2|AK1A1_BOVIN 48.31 325 156 3 2 316 3 325 1e-102 310 +AK1BA_HUMAN sp|P52898|DDBX_BOVIN 49.05 316 157 2 5 316 8 323 5e-102 309 +AK1BA_HUMAN sp|Q5REQ0|AK1C1_PONAB 48.10 316 160 2 5 316 8 323 6e-102 308 +AK1BA_HUMAN sp|P17516|AK1C4_HUMAN 48.10 316 160 2 5 316 8 323 1e-101 308 +AK1BA_HUMAN sp|Q04828|AK1C1_HUMAN 48.10 316 160 2 5 316 8 323 1e-101 308 +AK1BA_HUMAN sp|Q5R7C9|AK1C3_PONAB 48.42 316 159 2 5 316 8 323 1e-101 308 +AK1BA_HUMAN sp|Q1XAA8|AK1CN_HORSE 47.94 315 161 1 5 316 8 322 1e-100 305 +AK1BA_HUMAN sp|Q6W8P9|AK1CO_HORSE 48.70 308 154 2 13 316 16 323 1e-100 305 +AK1BA_HUMAN sp|P70694|DHB5_MOUSE 48.10 316 160 2 5 316 8 323 3e-100 304 +AK1BA_HUMAN sp|Q95JH5|AK1C4_MACFA 47.47 316 162 2 5 316 8 323 3e-100 304 +AK1BA_HUMAN sp|P52897|PGFS2_BOVIN 48.38 308 155 2 13 316 16 323 4e-100 304 +AK1BA_HUMAN sp|Q95JH4|AK1C4_MACFU 47.15 316 163 2 5 316 8 323 8e-100 303 +AK1BA_HUMAN sp|P05980|PGFS1_BOVIN 47.47 316 162 2 5 316 8 323 9e-100 303 +AK1BA_HUMAN sp|P42330|AK1C3_HUMAN 47.47 316 162 2 5 316 8 323 9e-100 303 +AK1BA_HUMAN sp|Q95JH6|AK1C1_MACFU 47.78 316 161 2 5 316 8 323 1e-99 303 +AK1BA_HUMAN sp|Q568L5|A1A1B_DANRE 49.08 326 154 3 1 316 1 324 2e-99 302 +AK1BA_HUMAN sp|Q95JH7|AK1C1_MACFA 47.47 316 162 2 5 316 8 323 3e-99 301 +AK1BA_HUMAN sp|P17264|CRO_LITCT 47.17 318 164 2 3 316 7 324 2e-98 300 +AK1BA_HUMAN sp|P02532|CRO_RANTE 46.54 318 166 2 3 316 7 324 2e-98 299 +AK1BA_HUMAN sp|Q8VC28|AK1CD_MOUSE 47.34 319 158 4 5 316 8 323 2e-97 297 +AK1BA_HUMAN sp|P51652|AKC1H_RAT 44.65 318 168 4 5 316 8 323 3e-96 294 +AK1BA_HUMAN sp|P23457|DIDH_RAT 46.03 315 166 2 5 315 8 322 1e-94 290 +AK1BA_HUMAN sp|Q8K023|AKC1H_MOUSE 44.62 316 171 2 5 316 8 323 4e-94 288 +AK1BA_HUMAN sp|Q91WR5|AK1CL_MOUSE 44.48 308 167 2 13 316 16 323 4e-90 278 +AK1BA_HUMAN sp|P82809|AK1CD_MESAU 43.32 307 170 2 13 315 16 322 2e-85 266 +AK1BA_HUMAN sp|Q6AYQ2|AK1CL_RAT 43.71 318 166 5 5 316 8 318 2e-84 263 +AK1BA_HUMAN sp|Q54NZ7|ALRB_DICDI 47.10 293 139 5 13 299 17 299 1e-82 259 +AK1BA_HUMAN sp|Q6IMN8|ALRA_DICDI 44.11 297 148 5 6 300 6 286 5e-79 249 +AK1BA_HUMAN sp|O70473|AK1A1_CRIGR 51.74 230 108 2 15 243 1 228 3e-78 244 +AK1BA_HUMAN sp|Q0PGJ6|AKRC9_ARATH 44.33 291 140 4 3 287 6 280 5e-75 239 +AK1BA_HUMAN sp|P49378|XYL1_KLULA 42.68 314 159 7 1 297 4 313 2e-70 228 +AK1BA_HUMAN sp|Q55FL3|ALRC_DICDI 41.67 300 159 4 6 299 18 307 9e-70 226 +AK1BA_HUMAN sp|H9JTG9|AK2E4_BOMMO 39.56 316 169 5 2 311 5 304 2e-69 224 +AK1BA_HUMAN sp|Q84TF0|AKRCA_ARATH 41.03 290 149 4 4 287 7 280 5e-69 224 +AK1BA_HUMAN sp|P27800|ALDX_SPOSA 43.79 306 156 6 1 301 1 295 2e-68 222 +AK1BA_HUMAN sp|Q6Y0Z3|XYL1_CANPA 40.81 321 156 7 5 301 10 320 3e-68 222 +AK1BA_HUMAN sp|O80944|AKRC8_ARATH 41.91 303 161 6 4 306 7 294 7e-68 221 +AK1BA_HUMAN sp|P22045|PGFS_LEIMA 42.91 296 140 6 3 297 7 274 9e-68 219 +AK1BA_HUMAN sp|Q5BGA7|XYL1_EMENI 42.38 302 163 5 5 297 6 305 2e-67 219 +AK1BA_HUMAN sp|P14065|GCY1_YEAST 41.89 296 150 6 4 291 11 292 6e-67 218 +AK1BA_HUMAN sp|Q10494|YDG7_SCHPO 43.06 288 153 5 7 292 18 296 3e-66 217 +AK1BA_HUMAN sp|Q9GV41|PGFS_TRYBB 41.84 294 134 5 5 297 7 264 3e-66 215 +AK1BA_HUMAN sp|O13283|XYL1_CANTR 40.75 319 159 7 5 301 10 320 4e-66 216 +AK1BA_HUMAN sp|P87039|XYL2_CANTR 40.75 319 159 7 5 301 10 320 5e-66 216 +AK1BA_HUMAN sp|Q4DJ07|PGFS_TRYCC 40.20 296 139 7 5 297 8 268 1e-65 214 +AK1BA_HUMAN sp|O94735|XYL1_PICGU 40.89 313 157 7 5 296 3 308 5e-65 213 +AK1BA_HUMAN sp|P38715|GRE3_YEAST 40.38 317 163 7 1 297 1 311 2e-64 212 +AK1BA_HUMAN sp|A1D4E3|XYL1_NEOFI 40.62 320 171 6 5 311 6 319 3e-64 212 +AK1BA_HUMAN sp|P78736|XYL1_PACTA 41.75 309 156 7 4 297 5 304 3e-64 211 +AK1BA_HUMAN sp|Q12458|YPR1_YEAST 41.95 298 149 8 5 294 12 293 5e-64 211 +AK1BA_HUMAN sp|A0QV10|Y2408_MYCS2 40.07 297 144 5 1 297 1 263 1e-63 209 +AK1BA_HUMAN sp|Q9M338|AKRCB_ARATH 41.46 287 146 4 4 284 7 277 1e-63 209 +AK1BA_HUMAN sp|P28475|S6PD_MALDO 37.70 313 164 6 1 298 1 297 3e-63 209 +AK1BA_HUMAN sp|Q9P430|XYL1_SCHSH 40.38 312 168 5 6 301 10 319 5e-62 206 +AK1BA_HUMAN sp|A1CRI1|XYL1_ASPCL 40.52 306 163 4 5 297 6 305 8e-62 206 +AK1BA_HUMAN sp|Q4WJT9|XYL1_ASPFU 40.20 306 164 6 5 297 6 305 1e-61 205 +AK1BA_HUMAN sp|B0XNR0|XYL1_ASPFC 40.20 306 164 6 5 297 6 305 1e-61 205 +AK1BA_HUMAN sp|Q3ZFI7|GAR1_HYPJE 39.80 299 160 9 1 295 2 284 2e-61 204 +AK1BA_HUMAN sp|Q9P8R5|XYL1_ASPNG 39.40 302 172 4 5 297 6 305 2e-61 204 +AK1BA_HUMAN sp|A2Q8B5|XYL1_ASPNC 39.40 302 172 4 5 297 6 305 2e-61 204 +AK1BA_HUMAN sp|Q2UKD0|XYL1_ASPOR 40.20 306 164 4 5 297 6 305 4e-61 203 +AK1BA_HUMAN sp|B8N195|XYL1_ASPFN 40.20 306 164 4 5 297 6 305 4e-61 203 +AK1BA_HUMAN sp|C5FFQ7|XYL1_ARTOC 39.94 308 174 3 2 300 10 315 9e-61 202 +AK1BA_HUMAN sp|O74237|XYL1_CANTE 39.62 313 171 5 5 301 8 318 1e-60 202 +AK1BA_HUMAN sp|P31867|XYL1_PICST 40.26 308 162 6 5 297 4 304 2e-60 202 +AK1BA_HUMAN sp|Q01213|DTDH_MUCMU 39.93 298 168 4 9 297 11 306 2e-60 201 +AK1BA_HUMAN sp|Q8X195|XYL1_CANBO 39.87 311 165 7 4 296 5 311 2e-59 199 +AK1BA_HUMAN sp|Q0GYU4|GLD2_HYPJE 39.31 290 163 7 7 289 8 291 2e-59 199 +AK1BA_HUMAN sp|P23901|ALDR_HORVU 40.00 290 151 7 7 292 18 288 2e-59 199 +AK1BA_HUMAN sp|Q876L8|XYL1_HYPJE 39.34 305 170 6 5 297 6 307 6e-59 198 +AK1BA_HUMAN sp|O42888|YBN4_SCHPO 38.89 288 165 5 4 289 14 292 3e-58 196 +AK1BA_HUMAN sp|Q0CUL0|XYL1_ASPTN 39.16 309 173 6 1 297 1 306 5e-58 195 +AK1BA_HUMAN sp|Q46857|DKGA_ECOLI 35.93 295 155 6 3 297 5 265 7e-58 193 +AK1BA_HUMAN sp|G4N708|XYL1_MAGO7 39.34 305 170 6 5 297 6 307 2e-57 194 +AK1BA_HUMAN sp|O34678|YTBE_BACSU 41.10 292 139 5 7 297 11 270 4e-57 192 +AK1BA_HUMAN sp|Q8XBT6|DKGA_ECO57 35.59 295 156 6 3 297 5 265 4e-57 191 +AK1BA_HUMAN sp|Q8ZI40|DKGA_YERPE 35.84 293 154 6 1 293 3 261 2e-56 190 +AK1BA_HUMAN sp|Q8SSK6|ALDR_ENCCU 37.88 293 170 5 6 297 7 288 2e-56 191 +AK1BA_HUMAN sp|P38115|ARA1_YEAST 36.81 307 166 8 4 296 24 316 2e-56 192 +AK1BA_HUMAN sp|G4MZI3|PRD1_MAGO7 37.05 305 171 6 3 289 4 305 3e-56 191 +AK1BA_HUMAN sp|P26690|6DCS_SOYBN 38.28 303 160 5 3 295 11 296 3e-56 190 +AK1BA_HUMAN sp|O32210|GR_BACSU 40.82 294 137 5 5 297 9 266 5e-56 189 +AK1BA_HUMAN sp|Q9SQ64|COR2_PAPSO 38.05 297 156 6 5 289 9 289 6e-56 190 +AK1BA_HUMAN sp|A1UEC6|Y1985_MYCSK 37.50 296 151 4 2 297 3 264 5e-55 186 +AK1BA_HUMAN sp|A3PXT0|Y1919_MYCSJ 37.80 291 147 4 2 292 3 259 6e-55 186 +AK1BA_HUMAN sp|O14088|YER5_SCHPO 33.66 303 164 5 1 299 2 271 1e-54 185 +AK1BA_HUMAN sp|O49133|GALUR_FRAAN 37.77 278 161 6 13 286 19 288 2e-54 186 +AK1BA_HUMAN sp|Q9SQ67|COR14_PAPSO 36.91 298 160 7 4 289 8 289 3e-53 182 +AK1BA_HUMAN sp|Q9SQ69|COR12_PAPSO 37.46 299 157 7 4 289 8 289 5e-53 182 +AK1BA_HUMAN sp|Q8ZM06|DKGA_SALTY 37.63 295 150 6 3 297 5 265 6e-52 178 +AK1BA_HUMAN sp|P58744|DKGA_SALTI 37.63 295 150 6 3 297 5 265 8e-52 177 +AK1BA_HUMAN sp|Q0GYU5|GLD1_HYPJE 40.14 294 157 7 5 289 7 290 9e-52 179 +AK1BA_HUMAN sp|P47137|YJ66_YEAST 34.97 286 155 4 4 286 5 262 3e-51 176 +AK1BA_HUMAN sp|Q02198|MORA_PSEPU 36.33 289 157 6 4 289 7 271 1e-50 175 +AK1BA_HUMAN sp|A1T726|Y2161_MYCVP 34.35 294 157 5 5 297 10 268 6e-50 173 +AK1BA_HUMAN sp|Q7G765|NADO2_ORYSJ 34.35 294 175 7 1 285 3 287 1e-48 171 +AK1BA_HUMAN sp|Q7G764|NADO1_ORYSJ 33.89 298 179 7 1 289 1 289 1e-48 171 +AK1BA_HUMAN sp|A4TE41|Y4205_MYCGI 33.45 293 161 4 5 297 10 268 4e-48 168 +AK1BA_HUMAN sp|A1UEC5|Y1984_MYCSK 32.42 293 164 5 5 297 14 272 8e-48 167 +AK1BA_HUMAN sp|Q1BAN7|Y1938_MYCSS 32.42 293 164 5 5 297 14 272 8e-48 167 +AK1BA_HUMAN sp|A3PXS9|Y1918_MYCSJ 32.42 293 164 5 5 297 14 272 8e-48 167 +AK1BA_HUMAN sp|Q9C1X5|YKW2_SCHPO 34.11 299 162 7 2 298 8 273 8e-47 165 +AK1BA_HUMAN sp|A0QV09|Y2407_MYCS2 31.97 294 164 5 5 297 14 272 2e-45 161 +AK1BA_HUMAN sp|Q9SQ68|COR13_PAPSO 36.58 298 161 7 4 289 8 289 1e-44 160 +AK1BA_HUMAN sp|Q9SQ70|COR11_PAPSO 36.54 301 157 8 4 289 8 289 9e-44 158 +AK1BA_HUMAN sp|Q09632|YOF5_CAEEL 35.67 314 166 10 7 314 7 290 1e-43 158 +AK1BA_HUMAN sp|E7C196|MER_ERYCB 37.67 300 152 6 5 285 8 291 1e-43 158 +AK1BA_HUMAN sp|B9VRJ2|COR15_PAPSO 36.75 302 155 8 4 289 8 289 2e-43 157 +AK1BA_HUMAN sp|A5U6Y1|Y2999_MYCTA 33.78 296 156 8 5 297 13 271 4e-43 155 +AK1BA_HUMAN sp|P9WQA5|Y2971_MYCTU 33.78 296 156 8 5 297 13 271 4e-43 155 +AK1BA_HUMAN sp|P9WQA4|Y2971_MYCTO 33.78 296 156 8 5 297 13 271 4e-43 155 +AK1BA_HUMAN sp|Q7TXI6|Y2996_MYCBO 33.78 296 156 8 5 297 13 271 4e-43 155 +AK1BA_HUMAN sp|A1KMW6|Y2993_MYCBP 33.78 296 156 8 5 297 13 271 4e-43 155 +AK1BA_HUMAN sp|P06632|DKGA_CORSC 32.40 287 162 5 7 293 8 262 3e-42 153 +AK1BA_HUMAN sp|A0QL30|Y4483_MYCA1 34.47 293 158 5 5 297 17 275 4e-39 144 +AK1BA_HUMAN sp|Q76L36|CPRC2_CANPA 32.89 301 165 10 3 289 10 287 2e-38 143 +AK1BA_HUMAN sp|Q73SC5|Y4149_MYCPA 33.79 293 160 5 5 297 17 275 3e-37 140 +AK1BA_HUMAN sp|Q8ZH36|DKGB_YERPE 31.14 289 161 7 12 297 2 255 5e-37 138 +AK1BA_HUMAN sp|Q73VK6|Y3007_MYCPA 30.27 294 169 6 5 297 12 270 3e-36 137 +AK1BA_HUMAN sp|A0QJ99|Y3816_MYCA1 30.27 294 169 6 5 297 15 273 4e-36 137 +AK1BA_HUMAN sp|A0PQ11|Y1987_MYCUA 29.25 294 172 6 5 297 13 271 5e-36 136 +AK1BA_HUMAN sp|B2HIJ9|Y1744_MYCMM 29.25 294 172 6 5 297 12 270 5e-36 136 +AK1BA_HUMAN sp|P15339|DKGB_CORSS 32.26 279 156 5 17 295 19 264 3e-34 131 +AK1BA_HUMAN sp|Q8X7Z7|DKGB_ECO57 30.88 285 165 6 13 297 3 255 5e-34 130 +AK1BA_HUMAN sp|Q8ZRM7|DKGB_SALTY 30.53 285 166 6 13 297 3 255 6e-34 130 +AK1BA_HUMAN sp|Q8Z988|DKGB_SALTI 30.18 285 167 6 13 297 3 255 1e-33 129 +AK1BA_HUMAN sp|P30863|DKGB_ECOLI 30.18 285 167 6 13 297 3 255 3e-33 128 +AK1BA_HUMAN sp|O69462|Y1669_MYCLE 28.27 283 167 6 5 286 13 260 2e-30 121 +AK1BA_HUMAN sp|B8ZS00|Y1669_MYCLB 28.27 283 167 6 5 286 13 260 2e-30 121 +AK1BA_HUMAN sp|Q5T2L2|AKCL1_HUMAN 49.57 117 56 1 5 118 11 127 3e-30 116 +AK1BA_HUMAN sp|O13848|I3ACR_SCHPO 31.60 288 159 9 7 286 6 263 2e-29 118 +AK1BA_HUMAN sp|P76234|YEAE_ECOLI 30.30 297 163 8 4 289 5 268 3e-29 117 +AK1BA_HUMAN sp|Q76L37|CPRC1_CANPA 27.18 309 167 9 6 289 9 284 1e-28 116 +AK1BA_HUMAN sp|Q07551|KAR_YEAST 29.04 303 173 10 4 289 7 284 1e-25 108 +AK1BA_HUMAN sp|Q9USV2|YHH5_SCHPO 30.20 255 142 8 35 286 33 254 2e-23 101 +AK1BA_HUMAN sp|P46905|YCCK_BACSU 25.08 299 154 10 29 289 39 305 1e-17 85.1 +AK1BA_HUMAN sp|Q94A68|Y1669_ARATH 24.08 299 176 9 25 292 84 362 7e-15 77.8 +AK1BA_HUMAN sp|P82810|MORA_RABIT 31.18 170 45 5 117 286 27 124 9e-13 68.2 +AK1BA_HUMAN sp|P46336|IOLS_BACSU 25.42 295 159 10 29 289 38 305 3e-12 69.7 +AK1BA_HUMAN sp|P80874|GS69_BACSU 29.36 218 107 9 16 213 16 206 3e-11 67.0 +AK1BA_HUMAN sp|Q56Y42|PLR1_ARATH 23.00 313 178 10 16 285 50 342 6e-09 60.1 +AK1BA_HUMAN sp|P25906|YDBC_ECOLI 23.75 299 181 11 11 294 19 285 6e-09 59.7 +AK1BA_HUMAN sp|C6TBN2|AKR1_SOYBN 25.32 316 178 13 9 290 19 310 6e-08 57.0 +AK1BA_HUMAN sp|P49261|CROB_LEPLU 45.90 61 20 1 95 155 15 62 1e-06 50.1 diff --git a/notebooks/images/pandas_dataframe.png b/notebooks/images/pandas_dataframe.png new file mode 100644 index 0000000000000000000000000000000000000000..2d917747ff871a4cdf3c96a334208dde6d7acbc4 GIT binary patch literal 34313 zcmeFZby!y0yEgg)(jAi0D2l{OcbB5l(jwj64GJhos|eDF2#821jWlAA0@5uANJ=Bf zd8TXqVt?O0`#bwO=lr+vS{J_a9dpbv$9Uqt@8@wvYpSaf6VMZ&P$=T7SCq6-D9mjX z3S$}%2d+$p>*JzOXB`4`4ZO6?eOcT*Ty5-~tXaJL-K<%x{p@W}D8I3iG(!&#@rO#M z?hnK<_ZGwqloxS4kNpWq1?aBZ=->2xq@dEbOhW6VbrY*~_4L=Y<?s8Yt0p4+$!W$t zPfABDwlhVCWNx4Q_7Rzy-v4pt$8G)T*XiD-uF|D<hBuG%7QYV#n}kYlZmmdcd`Y>p z5_DqOaGKZgeQ4MfOL}jFC1xk&9gaikox7pGXHBtoYz;-KjT(8)SHJJ>9?#>1#D;Vn zx$L=y(lk_4Y@gZib*r(*Wj?l)>q`*K-M+A8eEK!tZTd$w>4GpV$>B$hy_lPefx0r& z!7*z4*)joyY9D$T?-6eFbY+*yn))t}?+%A8)*XEAG7ovtHomL=CHQtvXRmm1n&;k| zIwnEh?o#Do?ybi8_jf+G1#@rTI=I@vw_m??i+EHL|FB|!q_M!UXMTVFneprT`Muyw z7YmD>aT?Qi*=N*-9WFK=(!F9;(3)O9pfEDXr6{@Thoyf~z_Z>nzV0<}^MYx-<U31r zfiC)JclzM>yxjMT3z?4u>zf)|c}qE(iImlwYGcA`wv$wsB@X6){Ysgmk20%sdCgrm zzD30UFmU0?&x;dsxGvWlWGWXw1rUeDq)M8{5-K&@&^-LyL%k8>c~DM~-6J=2e+ie> z?b2anvdq0ieD&S5blV5@`$ylhKbukCxx#v1!Om!@oeR_Hn?`i<xs^-LQ?)-8Jx?=Q z5&xE|(^G48=Qdw%`$IL}{J}43rMd6Z9A4+^=@_qdSEIyUJTbPfto$nZ$VX*eYO*$W ze^!;&{dtlqo%h>*&Sk;*>w>)k#ZR607W55f+CLS)svBDwch<J=%=DSE9-j1(eLjZI zndSBKQbCr_k?mlHf9uis{&Ygxuc)-9q%Uy`f(}wfpE}9Z-5yd2vMuJZ&nc6$<i4}8 zDURu9h*al)b0hVMl->nwTXE%By!UHC66=CKO>FzrqLlNYRHCQk;(2{1qHMNQb+St9 z`!y$ImFKITilj2ICg)~2U@iCxd^Rxt-Rq7s9P|MD8UdkfAg^(cRMp)mTv5^!!}mhV ztIoYIq|7=~g$Sxs=s%nH2{@BZGpoCp-=wcEkrKRDFjQe~8fsH&f77gcOo>1!Yax!W z)8AwQGwg%Jwbenk-)EZmtvF?GU3x0?YACISrmsdnqT$)?;7s4ZFLWWqC%dUuYr-!V z1m>3y#P`X+T(=pepR9TiwXM{Wsrlo=!pi7V`A$sUm5W+l<2|O{d*<?%3zHn2j%xko zZznt1Bx8)p6b$U{C4R~ZmyiGO5%*o|!N<&u_myv*zwbPFxYgFH{VVFi_eJMS`^Yaw zQQmjVurGWZ?@?bSVv8dDzF8Jd+?`MEhSfa$Q(rVy-z21rCH{cmoA5V&MU}yn@y7Ok zzWGL?1omwF##r7sAJtRy)zTQDrV9tuG))8^R!$rGV~*IZ)eb)FXinZ7G;WUEfdv-R zvzEgirf+(M;<~Ej$x2^0Ohd{(oK$tqZqq$F7KwN?C)1YSku9SYJLvtOai6|T%4M+b z!i>^&5Aly3U&op9#BRFaO3LB968&htT}JpoV}faWf~&OeLp~3#kKwM)%=9e_nPAKp z7&ook3vJn+XqJ>{Q^v1)aX7~ZVtL1+4JR;5)y600lO-=y^=py{8t$}CJ>JhK{wCsZ zW?W8<Ea0Av4vV!lEB3KYY&R)ycQhCM0ox+i@bDxhulRY4kHX<;C?BI>GS*Kby;axH zmrz22J{Srz2Fv>&8-19EUq9Z$dvpH{{Q=?T#=VPm<{jd$<U+Ex4*6EKPjc4I4lD*2 zJfqNl^}OF`PuGS0qNn1awBBtZzR_^ani9%h5fS%}aL4l&6F5DY2DLAk87<#^Swjuh z?>rP0zxv>l$hQ7XT@8{ncFd@duQIuBmOjOMqsPyT=Ha!ZML*{bQB}fw{<vDs9KZj^ z0j}AXN6RdU`%Q)vO>Get6CNi5ZN|@|$4mpfi63G$S-K7hc3n#5h<ej<rl@?gH5*s# zM)hewIyvyY;Q+>=G=0Ul`=77J{va)xUB^Rd&zJ2OZe5{zuZNd9_{vkUON3eS(ilId zsBn{c57}{)pdN=3L$f7soN$V_84GS-Z~nI|&RMn4Iob#(?z``nhR0sYJbUF^7O749 zj^kowR!z}}yt=Et{z!db7)lTSo4}J%9m4h1!cE~PibvK-B#uvA$if<pFMSkk!S4zC z%80KN#QFQhYr_#7K8dAUZCoYu7K09g+68YZCiL@P#CXt^W6Y}?*&LJ#Cw$V>PfWSr zW_ib)W}>le#Z6twY$vgM*?Ez2vpr3auaD~)%e^)YISeM2;)z5v7D=u14>xX#Z~yS- z#6cIH*{ZztMephPM-3NqaC)#p4f-zoC-re&TF3lW`$e?O(_%WTP?Ij00^`&qB`vG% zgdJywHMdaJ%24>amx_GtkO1qux?HTc;|eof;TGZdI&H3}*a!`JE<Y99IG6No>y>cB z_{<#9)5s~SsW5L7yw7Jvmp-DL*Ra2ala6#d#Zhd1jt`3sQ`2<5-1;g9tzBL~Rn~D! z$SZ{Vr6&eHUe)^qX55|Iw-!Viww|rb8;=c9Y+$bLNjI#9by_~PIxkQl!It?viiCVi zaWz=Vkd!xRvz-20_KUBKNrkhSEM%NtmZqbqxMmz4t@7-gzGfBW;)s>aR%RC@ecMN@ zv7^R57B)BfAzRD?*YU){oAQ8Z(~jiiRUC0ML%_LP81Z?6UwB{cvvyQn>Qid07{rOv zS|p~HH`Q6RI&;f8PNvWI!PWai@6~Bri2eBQJm*E(J;4-2y_EbYc-|VD65Ma|$H`gU zGgr5jFHn4IH96y=e*dU<YnxWca6^}QTkkTBN!-yTE%HqN8A&U5rU=HAb*-1<_-Bj- zuXH?=;ZOG%5{vg^W@RhLX{XCPlEc~J*LqwOrnpj>z~x;XbLmRBM3<gTbU=D)T!Zoi z+coMHSz(*=ZnnIwA}dToB@@dssT;9ahVtyV>Mu0VLT4ZG8adl2C=<OW?7I6!nyFc3 zS(rm$O;gkLZJ%s2<t?;Nt>N}_8TlhJqK=f4*WuG%m)JED@?)Qv1?c-1=GON{WF`sS zAPbTZ&|gmaK&M8M($7HqIEljj`^-&-E=nU0&4Mb4LiaNPxwmr9S|;yYu2GPSbdm0( zo4_Vg|3t-XD9`f&$4_l_e$3|M$AG(MclDHlrjmsF$#?EgH=0qc>%DELy<S-~{UAyX zM`i4ufn#n_1>4uMW>&UqQHg_oX5o8wUS+PtE(R0CcfIGiS2~T?r5-(|SnsDPa*vP6 z_m*IH<Nd}O#rgW$+g87r3Gv5xa}f%w-Ev`tp3nTQik&fFOlod5(X?T$&T`@{ki5fn za*;miSqxqR)-anKXOKnW(Zs02%F6SL&5Dv0_Bu-uiI*QWW05jMD~e*{CuzN7KY5{; zaW9MT=|Upih2o6f2b5;v9=A9n8Z2l&k{tBD3{u9Vu}@VRN?{&0`<cANVSBrJ_SZSh z&r4<ASAE@OX*1E3M~Vw;VqGlq7)eto>4>+*w*HC43oVbIA5pG*6y7bykbA{*OK?a* zTgr}0cjUt&zvd3f<1e;%6yq!FzfZfKMzWW)<i9++oHRlsapot5PdSe0^PYO%QEd%O z4w{T(k1M|x5~~xlri0Aih90fQPiK%{Qck76_>}Q|skHIqhEl4Yx+xCJH*~XFs@J-O zJ!aM3Njk8`$X(ZHOIWfWyd)4tm&G|V>T{cwqOdbPo0W?8YqPn0%YCM^i~TMiHil({ z!^)clynGx>njD<(+L6^8-DaJNz`q==x+e7V@ax@MbF-^q6@nfslVpmiQK@0ggZviw zt@~887*U0dtm5%|FGLb2sP8K5<;FMrItS;hUd#{Cb-Z?1&u|bUvd&`qn2eoeYuNau zr&KhCBWJtFMNw@fohPX|Vba6_Q9Wl^G_SO%&D|1dF<Pi#RZ^^F>8DUwS#5YGbP#!7 z;^m>2<D=*39dF@VvnNNro+nj-9Mg>wQhkZa$5m211oqjE;hF}_?V)UiPbsvDN67OF z>^(lBOrF-61PpFpeXF&eH(ABbC3}lebd&gM<isowj>~r4?;kIzst7t{e112YN_?V2 zvAKKyu)7rS!HB5aQK{ih&*kW_{$q{Yhewn}^T`X(m21WCnFSg$hTdwV>3;LXV`v6z zP4@7UjEa>N-8V9=C&s^Yl>=*uRTw@ibp@y4&3*ND>HnO4&$Fpx<=%?Qw{@ikhQpD< z#sOmc6CPHQ^e^YzQp`k_tBgNA4-bC%`sLZl^*YQ%nPvTGy?x*3UHa!OLlOnXGJc(; z#V0>c@!}ndDPXzcDzLIUl~GS=A+0}$|LfDd!ceotW8$M1($de<nz3}xvPk%!Q@Wku zlQHwAjF*9fhcT%1Zbk6Z**+<s3mRDGDDg1eyc(`!yEW9z+iN9lF?!YYrn_QA6r<K2 z)xMEtVDcp;jBB=}9_78nWY_EC7Wn<1&pF%F5DYa5)|3WKxfz#h&(~rlZ&sYuNqbpD zc3ftv=&MVn*7Q101^-YWRwWSUa%ufm)cWwGERk8I@Z5byuQVYiuIMuwV&*>453a3y zx@K2$dO5!wJG93=%QUQTp~d4&1CMiDT<>xvWiT#!Y~3(3wzHV@6?=!>*CvxM*gwwE zKeB4}OuUqdPm$Dv#e16oO;54$Wo7Os=Pr&}iBG$2Y=)3^%SKVV7+T?1%j1;ui_TL& z>zy}V$SEvMVx>3dVc1UleV18@srdeghU)I4Ji7}FarJ&T$MZjUP&B_T)jDG0*dEq; zIq!M4*`8hFV>I<m8av_u=GEHACqqmDzrJ1wewBH`@}|{5;$?7>v+k`n5n1=whst%Q zbl=nX;;O|`&V~!%#ig<Zi6@CXHq7Zxe#tE1Czn66X{M1chw@OfXQ;vC%6KbmM||zZ zees7?C3^VoS#HJR&%<{fo4gkDb=g-qv-Ir#FV5i86T%W|S2q9i9a_3r&oZ$~S6812 zGkcOLuc^;1p{kdgAAT9p75h~3w6i;o&Fg-CWVC^z#4P67;91rq7O8r#S8@xb#o`#_ zg_Oh%;c9J^=~;THxgR56aM%i1j0(w}IM*W3ik)On)EW4XWj62hneL|!-yqh`XKg3( zxc6kfyX@PGcwfm%8HS~b_eDb3FQ<ENrpoe#=ZECIJm|nA$|0D+d(vWYujN6FkKqxA zoJr`~`Fq18RB<&`xvw}9l7DrVrqfGfsl6PQ3#I<xfT<W+X7-k7?R@YJLRS3<>d2br zzS%Q5<Sh+-syDJpQ-xlMeXc)NZNbOy+WCGgdaCQib^7tJo|Qy9*7n0W7pB>qi5PY< zGQppo_MFKw7<PLXBgeC{{SGyh9)BaMJLe|q_cCfTm-YLj!Z(Rq@^#mxX}5;lb5!3x zoa!~y`|z@~TGnn-p)MwSDE86v%q4{~?03;Jk^Fa9SRVhDs6<`esej{1z;pc$_Cn21 zw=5lHlZPTwFSe^0T<)IKZB!JFVaZ7yvtT_^eZ*ErQ{w-6V9jEaeL*cvh0mGCyZl>t zQAVxj$%5`xg<g~V{Us?g%H<tLj%T75u9>b(zJFoTawSK;Bfai^Xyxt;?gSdvp`k`% z7M(Qp6vHm;S=u?jru^j{p1iVECp6~I8xy1q+#h=mf}hCV_<6JFR_?vMgGDEA8_f1F zGs5t+3O?CBZzZdl+=_d+-|Xsw21wFe-EO;oBS&X8h?uInpe@zD2%?-PYg`>9pI%58 ziW{r0C1beSDlFICkeg}KCRRwkBPI~dds&@Jty{y5WXm#qpf*~FS<M?&L47OYhke{E zo>iiT|B%|Cynh7s$n)NZk3Z6&uvGh6rZ$f*hlehA?^%D(s;CRx=PI?`pE9Uycdg&? z-S}Od`OCQcWPRFRHvQ{^dp|x0>XvY{txbkhx_EVlR4Vh^V=*;&j;Tsokz$|GxpgqI z(@7ON`da5J6_3!30;OJkCY?u=d)+zgiNCsj;1^?6_sry-$eCOaUPI{wYwFXi7kS<F zSw){+Q5fm*Ax)QaK3lvpn6RK7a-vvKDeItgda~x@Utg#${TVy(;oc>W`!~qfg72)_ z=Li$biAYvo-{2h)9(>qh^2w~&=tyFrv}M}q04@K^*PZ@>$^_5NDZRV+S>ag;{Fh9A zrpP?_gxQ2?Wd9>}JBukav+Fo+w6T0V$MZm}-DJ;OtnzxF4TD6Bq5j27bwsVE5w!Yv zF^-MKhjlrO$G3aXV|PNW)V^Gx8||Q&_q!1MVlYBpdsw1$vUf?h_a)t+NpO+)cImeX zSuM}SjW?tE!uy9zow@Qf57yXdW$uJlRJ~^S@th-o%SyGE`aF%x<Q=_h^=I=7tMdma zQqK8qeNiWSNO0Bnc38aPNpQVeVVq4t+#Tl9@r?UaJFl{xY?N$;go-tsd*ADw{}klh zqhO;iCzbf)^Gm<0ztS!BBi>yb(<J4Tl>6GkR_Ku_7BZ3limdhE`T8%#*{%J&t+$hy zrg!cceVgmQ9?0Y*Dx`JOcB1si^eE#Z2X1ec{QKZq5(O5V?Cv+-UP@s)m`qev1FvoE z^MC%}om9*aco_9#;KGXca^kxu98YOK|Kctc?@H;+zO?@HLj<Sq<#kEdspy^2TarmV zj^ms(52UXtzBUdO8ZtR@xOvj}w*K2j1z(Wga8OMOJuQc?dfz7<@5tyKK*&o2J9Gub zsFG^gH6_iH(;3bE1lu~Rro)_j<V97-1LX6!->Iik`kzZq$ba$7Q!?nj!0n4xBVTIy z-yW{HFS_V(?u>oQ*}f+6-H>(Q$@jIsdxIi85z|Xf8kvhClK8aVRGu1_i`K;+%3x!E zRCq4oR?p0hQII~*pKR$iRl5==BQ{hmQu#4aiSF6eoFrq~ShN#U>WIGj4ddL}^UmJw zLdF&H*}BTQ<sz8_i$glu=|Xr1wN?b&LVRUOnYno!K1#gY!95@Igh-At3&lrU+ax}Z z>D7m1si!SguY1deDAk@5vbeGzPWb6uJN>>oW3@V|P`rsigc|der&@_MZO@KX&qSKU zSMP1F^5@;~DJnMXZPhZ8a4);qgECV1?6$P5oSj?2UNE{q<*Js+tzu}+`kcypm5$>k zr9`J;=7)o;C6;}5c>CwIuk2QHpqX7mK7EztDDHXZ)>`%8;mt?aSDzo>(V{-SGPS_8 zOA*ra+h=)OrNYgxqGRU4;tIYaZ(Tl*zyEl+<M))NT|Jtq(?fr3siSJ<;LtC}zY-c{ z?SG%BPG<|9{?6ObNDJMy`!x7FHihGR?0)&XRZ8A8LhY|<r?{bIg3eaF5z!#QM%XJV zYF<@T{D<laT5CpdlJu1}xpN)2bz-!+Nc70uKWJuhM?Da8IQu}8b&#M$BGu#w`v(0( zDjL<6%9)u?i=mbdLw#I*d9q`ayQinNR}#f(3YAg2pW8%&?>>|71}ay}JZ(sr#IDZy z#Up(FbmO!2x5*V~v7%Tm2!2j|^qeZFb66TP=v~Y<_o-)Sys5A&KMM;*uRz<|Xwj$w zQRnb&8TZG@R>=n$eqGNADP%ovZ5_mU_FLbvRq4JSkR_(ZO)Y7#Y^En5Ja=g>_2u$o zJ}YTGaaZ@RT*6=LI77`jN=vytGZ)bh4caL<qBJ!X>CTQs4ZM70!FuBtKJ#hfkCYGj ziN!=J@z}b#@;)asp_}!_#c1)_^U4%*OC9bqDq-ulatwVlHY_{svrsLzGit8Ciiyn@ zAMFGf$1q|9k2iOC8|&<aeP%S<{Fr!SH2JPjqymPYY@X{UjFW0PzwRF;l?em5EQ+`C zUBbc@&E8%a{UNilzP!3NEm-n91SJ#T@z7ga;W7$E8e<PCxWSF<5|*ycyyjM}7S_Ce z&TgQRqfk<^es1QLj@Di*7S^`*F4Alp^-XLn_Eyqt`ocHRH{2Ah?d-1vcv$NMsOwq= zI9iHZvB}C1Ncl;?1kTo8<}7~BPA;Aje$s4z=9PfIBbWKuSpE$0a+GE>xS`3S=;~q3 zBE&1ii{?@Gv-c5TlObS{^02az&{k6UdkXj@&1UE2<tD+$=j-ds>nq6X>S4>rFD@?5 zhZf)y5a59kJf8k8Ugmy0E}rbj6o1X3WbJ9`VejT;@9M&W%xP}n>g^@X#s=TB{5?5* zYpLu97yo_$^7+s7o?cdbSK)&YyblcE6F{TIc+dho0^)rCem{JB<HkSlcJcgs7hym7 z{LJ0>_<7NM&d&eg4o@#-pMO2?KfS|K7m5*l+SZ<~-X50L%0AXEUhMz=P&X%U&wqcW zx2H97>Cd{ItZevTQGcHN@As%)y`lNfdys9kwRd*=a|bf|-$z<m{&Sq0w};c8F;<p* z)=t*Wun<p}ng2hG_p-P7*8%;9_d&k--xdOo`{(@sVe~)t>(5&L?5l*5tED&c(5p() zY{+^gtXwVatt9^XE5Et0kfpf*kGPnKD36eswKb2pkcc^tHQGv$pI=ziN&qeTZ&O`$ z@$@oxv9v~}g2{R9VIFgF0bz4X5dj{wn57Vp5E_nG+?wBp$6C}%R7k*5Ou)+0^53S= z@URC@Fn9X*RUuPZ!Bl8VD=~8`5iuT1w50%#khOpqkGOz^D366T8f|SUDrzIj|JPK= zIY`KBUX^AO;6?wL{Ld9lCvz_wR}W`tw(IsT-hTi3Lf78eTF1*A*))C;0knu1zW_g4 zNK{lrnEzjc^sGHR!N`$0`O&-r0)M`-vXoGP8O_1i?48YRt@+$sZ2x?L9E$|J3~a19 z;u|pYpX=~05{e$y=3cHIx~{HH(rn1eSdc0IxD<=j-_9a&-PQ8XjeiDPTOt1Tx5p`% z+w%SSD8=_*4gTL|(y?>(b@_ib&%cKLa~63IFJD&=M-2}R3kPdUum5_U|7`F-XVQjP z=jr9)fA#<HQvb_1slUwi3f$}J;s0;X*Rgj0>(gHW$;tlDR<W@B2?7%4mVa%(r@4={ z)t@7Pef;YyOFMHHTWg56f4ka0uG{}7W-MUIZzU#TBgSJbWX{hcBxG*RBgQW(%42OU zE@&<;j25;L`RjoHIlHH;jhC;vhqb&d*Z`a}xYnO@X1Vys`Y!#)wfNdugN<B93rL{R zY=0duixeLcQ2+6IQb@?TaYN$YH$VyrNfK8P%hU08b91t{_V|w*^AAJ$zZv&$_y28_ z|EJskeb`@jE4sS*gVWl1Y52PQmyiEX2L3g}b$d%|7f;v!YU%$z<gab{x4Z*u{_7g# zGf0no|ICknn+4)I|3CixZEpWRJ^`lwpNIUn_V<6b>wmWEzqJGZt-=3eUH`LP|E(SP zZw>w*>-zs}y9oZpu&rGH==lPFCZayG2;3XT;+m=wY7_ZWOX9E&SMc4g7<!^mgp|mC z7^t)iTDXYob@hfa?gBQ!Sz;!?>&|EtiUoC5NnY1)Y;`iwMAy4jcC&Y)mWag{L!L}i z^YzrvCof+3eY`j@DEy`(sA}?)w`WX=P_gYe(}1n$K;>Jf?pJGC+i91$Ui<1Q$zdxi zoxw1tHLK|kUfjR``H+9&j4<QwcF4P+cT%>zT|vDWo-*G98)=e;94dP<C2739y;<0h zpI-&KT;&0~4p%U8UcV-}tpD1oh>RS5CUHZcQw|H&&P{*}d-jX6O$xbA{`arR8`{)& zf+?ew$fA|F@sJ@7cZ9c<kRPJIezl`D{{7>B=jz|L{x389mskJs)c+Tc@y&j$ET?1@ z`XDAI>FLwx2M-=lCq#Exe6SMuu;3vkA<^31O)MiLv$?f(N4qHE3P&AZqvh4B_yU50 zx!Og21scrl@82tMy^~eH7VwVg>C>k*ZnLe<O)2;9<62l+lGD<DV3r~0@1i7^#;bVq zrrc>%AVVg&uEUZd`V2uNrn<Vio4fm+>}+NQ1%<w1eLf{+WmW+J3V2}R6%L;hMvY8~ zNDZ08GoYjRo$L<7Nw`l(9M;uII8D@$JAQptySD!zK3+vzn*vVg&tr(XYy|1)>HGTn z!X7*zFsb#x?u=&)SICtsHGPM67`({A!NGFj0!~j)kIyeg0zyJ96O%4tH6>)5cg(hb z{#3BDyErj1VOD8mE<~R@u$<Fxr)g?R5~qUo&NSiGEADTLi)?~|lza`Fm@2WfcX)5r zjZE(M+X-Fd;!<#SUJG{@NLC}Kr+=_MS?_yP+0xPiJ2#t<5k<Qg9mS+OJ3E^y>C5}< z*)y|pgPeh7O@00QKZZ-$`S?h`_hzfw*j#{ffTy}`&d3iA4x&)+;~5E6RaNVcWR#Va z;Td;_9D-S@67urcFQ{ik(Fxjk9aX|cGBGh7CsC5O9iLcyd43hfVWK!LT$l?dl`+dD z)2h$N%!H$e+T7fvPB7%AHhC9tHgIpXWwARwDLEM%UMx12$Z4bu<M;31v}iNA`@iG+ z?cgY|U*D>~2fH{pG}K}z)ap>Fz?-62q}w_^&T!|>oz0&=SKR84;V`UhZ03G0d`5-E z(D2R8b;PvJ!^?E;Plu95D)cV~?yQbd^7He%d3db2)!J7YnKXqko<D!S@a0Q|;9%+N z7bun3$f*dhO>W)luXNNugM$M$xAsJmG^WGCFEFsWF4#r?#tmYyIQNB5XHlXKKP@dS zXBQVQNEK97QNy;>_^i<j(GwHlV0GEN*{n%OAO+K#{r-J8x5?1hIA(6n0(l~R9F9>5 zwaptki*nld`1qwS`Gm(OC*)*geYr18V3rSET}sy0=j(jd9*m3_`1$$e6c*y8@ZP$s zl6U9&1w}J6mVmuge0_a=l&A##ZP>xX!^1v1A+nf`&D~vPu&$Z;`GMKBp<~}$w{Me% zx1h=mh;VUnYietA9fx1z;o*_}7J)cYS@it*naIdURM@9aYSeh-lhow~v5ARfQm4n; zo7>xM-?KyGcXqr|yKT-%`Vhe>$-`3WyPaR@T~@nxE$PXVdyhCZ=2l1F)CBBuab3Qg zJLG_YA|WB!+}TmOas_X0ZVv4<f^P5Z^exn@t*D@25OHdTlbBwazjW!+m8)0n_oYq! zX0gxU;`(0fO22-Aii&FHU~B%$wQFsWgfwnzW5T-n`fX-tISz`J(NQ|^L_AVb)uxaW zWZIqe$?^3gI<U-{PUg^d-|1jn6s@SsxrYxQlF`uMz8Q^$SUNL(`dc*qocP`C@4b1i zUJ(%!6QiGAZw(}(7s9)G^{OcGHk<<upDER+wJ-VCYC=vV92_0N*;td+BGreJl9Fh} z+*t0BFh1z()BIpnAu_#xe7J2;=XFUyKmcv^fvn+hF<p}@`Cz!|cVydr()pv^ADS1b zqT=E@Ttl`QV(A2Nw2P>Uii$?w+Ec1$h~25E5V*oY5fK?V`;jxdeLzDtc);dHT+@$% z0u92ZA9wF!>fXF5Up%D1mLvzZJv>Y!9k{22ys+P<8DhF>`44evEK)hyp4E4L{D_~Y zn()5TaahyDB=*Y}ZE%g*_Gn64aSsky^4OwdsY&hKI0jLRffqMwf(`|^sXxa(iqIvf zH6bY;dT{R^&fwRt&vJ55_eq)G-@(E~2ktpZZM9Qw?(MDjDXUuEXdH4l`x^%prNkE1 zv8ECHrQgWl(Z`6cuIsOEH!(FG|0MkM=~F}}{rw%G^lq>|@S)a@7@DMq5AU+8#?LII z+<w0~+qTBl7Z?zLyb6R_u=wHEx7uLIWR#TQ85!q-w!1|~8iOTuxrUriGLMb|&WU@R z`SG=+Es0$<!p100F_(*qAoreezBcRS%Y?u7*4k>^=QrnK_}F-PUw8LMJ=ZSUOojNM zu1?aEB^~2DUe#hH&|*=Jq*Ac4K8H$(40o?zXnSz>vfII?IZAY04ua0}f&#=EAmpbC zJL;w1_yB?0!_%|ef7`}syowmSsdW+?iUj%E_%R)r5O2X8JiqstPd5hB3fYm_bfw<u zWC|o&UtgChBSZ4>q_kQ-1-nYDoPz_G&*})qj7o`}o*p$0G5w&!n^Xu}PGfKJ!S$34 z4e8n~`0w7mOGZbBFDolsQ&ZCnu`I8DW@V`O!^e+z)bbTnRB(0zq%$%yHV1VyD#w*n zigYcO`f?LLFQCFcr|`{pJrz_ii+%7wAt)%QUQvgHNvcM%@+U0%U@_fw6O1Z1Hx~l} zF$DamsHm{&YEhWWOyH@ep<(p36kb+QQJTuiN-?(?6udS>I({(Zdb-l-{(f?BF*k2- z<(oIpf`e-r89ji2g#=>;Q5PkMdsYH;Qc_Yg%gdLX2qCJJ=n-mY^!hmVFev_fd9xVV zV2Bm)N-R=R=giH`A;yQ9@ZL8qRfX5~@bJ*o)I?Pp#qk(d$#GNX!BX%D2_3f;UT;~H zG&VB9d0ph@R)Rh5e`Blh_{kGCX=z5TWCGjXtPaRvk!YEq12?aYDMm0M#ee{bK?h+- zP?0f%RcRs%3k$QwrH!pmc#PpK@oP=u!C{qDRGt}EyTA)YjykD83i<i-XT<bwWALvW z?IK93{4NtU>cT(REK-{~>BrCM>FKeFiP28hdH>jS<hxZD0nVDcW*x{`Szq$<C4s-c zKiHDOl`B_bIvFJUu(7dOFJ3Iv*(lK~PRh>aJJ;^&^Ru|PSW#Xc<7IL2?A+X4h@@2- z9Cd99ii+fC&qfCS+5^{q22;Vkg+)d7l`pmii*ybSzh_6%ufpKtMbV}f9S!NooE#R` zGY*wT-@bjDxz8NZT;)6tc|V?zhSzL=eG;OJGT^%O?2m12Sb{cPWD-#>U|k&@9SW(T ztL2@Ej~{!D3(o-r!X-J^3~vkg0~M`w7cxm#mNWyzjC%<Q31nQt^u+1_X}o`YWj%i$ zdv|wN%zf_6!$*(E>FDhBGqo8wv5Q~6gp4C^Z+~fTZTvofh&(VXR#w)f@oGK`3kxzT zDvrl_dd13M8IaY-r3;FN99E~^F+jjWr+2$~d9_0T*lH!@GiwgRPzc40>A3szr^oZ> z&&^<yA<8Q%Dq`(zOefUTh{3tO8;Ze6y26pc3}FR%-NM3)UD>kC;kcwKrlw3XCp$V$ z(~XRPn@8RSihlDD$04E(^WR=%m5`uEQhY&n$b;<A-*kG#XZ2s3v?O2FHzMA4sH6uA z=s-3Q%z~4XljxinnNERvhh>5^4!GZ}F94s!M~}k6Ce6mHoRLHZp?!1Gcbb)j1(hK6 z6OWXu9qqdD<;$1Ntxo3o4@vAI&P2h%!Th#8=ep8GONYWAspPdl^4OYBkR9v#vb3}m zwKd;4-y8Z{5Ds2)*AM`frnWXF%5NcAzs%|*MSAD(E5mTvlVy!o4|n%+x4tY<SEjEe z#<(cItqz(k@Zq_&>S<~Ru`mX93+~&E=U@lkKNYmyERa3Hx3{-%{nhdQy~V<(<fsP^ zDh}&Ue(kHm{-9=J_!{Mcf~2NGemS3$^3$Y^dj<jLU_1LXcJz&H^?_+;0&^!Wb5Iy$ zOzH$;S~O~-VVguH{@h1q_I}10ST;CW8^md_&N8Eti<d6RLxzk&mkumP%yQph=+;z; zqmP7jAk*?pRl80%LaIYjCKB)<p#UJwkPfVxM2A*CPDx?EeEC`pdTVpD`T5mHn>zy< z)rIEXo}O(>{dtgIwRCkO0O#3N8d+Ie&%*BcoSmDPoQ!m~6?mGMlynE?qN1T;2XG9T zu`>dnQsO55kgkghZ$(7~oEQov9kA2<rdKAWKTmaBD3l_mV`;4N@&%2o*s(DqDr#y1 zUjk&LcG1kn#<WYSe(?~*qGotDzg-^-kO}Y~#3qp#2+pddsTl_Thrn?W5o*9>!ACpt zr@v3OHbPGsz~D5Cbe%<bB!Z8G_Ety3A(}^ZSOBV--yOc)2^Mag(aU)592qln0$@}H zZtD4Q*Cu6vZ|Yf6(QrEK0s;?{)xn5*MO-GLhD`&BRr1O^EUm4r4-V!N5EQl1bUL*d z_9P|c{>Mk0^JA5cJf;Oh4qFh(&3m(?4_1mx6B85NhMV+5LS&dikKF-6PTHL0UrRyz z|Fmj2{r$_J!LQtH#^o`PruKHF=PzDVjpFR>ED!iWdeypp+x5+F04AwmX3w=R+4N*w zdNth?D*NI_x{YV<V1wS~Q`@W-c<L6U8~0L<IxKoe4?+Th2vojav9VOIl>n=d5EUT3 zdwRu@x?CNFT6soi4qvpkwUIM2#=Q&NS11_3ry<ZQ9^y@V(ca$P>?ERCFwinR-88u< z&X$DN-`@|Giexnu-gIZy#bmW~lBaRWXn;2D6SbcF_FtZl*A?gLa`6MmIXFC&FBo9u z<t1@<cX!^ibaQi)^YRjche0;tGC@Ci@}!D>F`bZ*P%&F3IHG<eE)T;$AmCYDozZ~{ zP_kCrM74ZCF!$=~C0ABfB0{@|zJ8s9FbiA|GmODg=z4}2x#E2i=jtMe+RPWOr-p&8 zK}bRn!}9X-hwg4=IC+0f)_eYeUbC&(4O{OS6cY8hl2Zk&2_RV}Wn^%l!-FunHKc#r ze0`!;!2lLgcO#LMkg%=!4i<2@SeU=Xq4F8zG`@E`6fm~GKqI^UXc96t9?EZ}sGzUj z*EK`Jn;>w1{j5wVFh*b<FRnkmn<X8{CN6F$CSEw?5VSw(n<^U`QoQlbrkfr(@bz>N zVlaC-NPS?2>{5+EhaTzZ?8lEe&KzpxshEF#We5qXwbt`{8GHlzv(=*9tmQ5?0*c|M z#+G980#JtZF{Ki{%du($QtdaM3Sa}H<$#UK8U6t&>&trJ=LHWxfB%-Dp|cX1>UN*Z z1)h!(U-LyM$6`<B<=8gYN+Z{!;D&~VnT3V(+{1%|c-^6^k&hld>c+kbc|b~9Iw>_Z zDk36c+$D>OfgxsT$xcW}2-#hMr(HHnpCxa(U|?VX1t~N9zPjqPEq#1^Y!ur#up<r$ zE8tyYN1*854?8<M6bjNen~Y4O;`b*XK3oB2KL@cFO-lqPWSz<hr?WR|n~jf)35=A3 zlasJIdfNXxBjUoqh@0bHoG)$oNdVjW`>;2ZO;+}{U-t0rP)6{yM$N#o(9@8v3~`+$ zzn!VT4fD@WczjQO0SjckbP2yX&h!KbqKHlbpGgf_=<z%=2CC`wWP_ZEiTB(wfS9r1 z4PnQxBZJRmPj&{7020Pf?dUB55#Su~bGPL$=hJ(I+-BrZ&3Oqj4^)}_&uqNgXM%JI z0!6ln<3@C$Clx{DjcLYA_2D^nlkKpnry_IR)6-_?W;67}Hkx#z(An3^Yjsn}&^b7z zr_6x6@bzn#U6-|P@SK{ux+=*uje&kM`i{;{%Wof&e4X0QmK#jWku;aybOGwk6XNPo zYqG>LIfiY&{T<*~dif5+SsI%0j31a-SirnTa(fp5oB+nCsjo*+FhDGLcc4XUK0Oxz z3*iF21DlOpfKZaYm*r7=jydFjmDjW(3i31rPQ2=b(1TgrS^swdyA}99QRsjjJAkCP z#d;-}sB;otcrY7d-L78&M9JfI-zLf@=PB?5_SbpN&3yBy?Vbm@1CN-v1CmlC{gWz~ z@U0z*pwFrSYnO3}9uU4f$S;5$B6PXVDg;;(&md77;d^W2H1$4fb6Hn*E}z3nV@eq# zkSW3b0sys8mkV;5T<LI|ozNH&J3fk2J;SbluxBdu`UPEG-JF^lS^!fu+h!u^#zEPy zUcDMWG6rJ<+M{yw=FNiJzn1b6<{-Jd?$6~N?mhmz5EUJLc=PDel9$Be<mBA9Z*^1c zirpz*z7X64j5wz2<gKzTNn8@Fii>-nx^nqOwq>P}ai)BYzwr_1C-V>l7#joKA$H6L zl@2-N>J>u>4jqs@3ksR<=<bhUXOg;Z$siFXGv?qOt#ogkaE#H>Q4!Xv%CX1p^Y8J^ zryMHhAb9%<cL8&%D(trlc(w%iAST>u0P9wu&Z}yX9fR$(-1nQjDW4H%>rNLXq`{vK z-p@oIUR6>$1OGNC0rGT}_;Wh)NuxyPfP&)6w8<A-TwICC$&qj03L!c7Q}Sg<Ya$p2 zA?70KTH4xof#^Ta&u5jCWB^4#vv>%6Fs#MQj(pKIrNykd+_KYcb);OqNsleb>|k@2 zvGEaV!fuY@+_@lCP7wic%idm9BP04R0|V|`bIOps2`RaSd(H%L0&Vu$m;(}dh2tUh z_u&_e_T$=+l00+$cb2q(i1{A8p^H;VzhED?(TUa|+V)Y&>wjB#ehG30LHeVx(o)yr zUdP79azzc>w>}liOZb(Q8v{EzBV+hEd5ptGfzE+Cb%55Xz>M#cFgjG?d~1y)JXn3x zi*}uoLXzXe<>>XW#cl_q;sqx!&=lY`uw;K9u=ixk^6juYWGqer=+Q13T%^^~(ZQ-T znp~N|*q8Y5{o6OQ`S)?aPV3QW8uPKG;9QW6!j#y`4`rOa$*T0b$BU<<r#~0z6qK2_ z;jB(K-7Xw;kx+bT!dor=l_5^0aA?JN<#t~Kbz|jre~Hfvm#HU>CBNCKk3zr_e4hD_ zIG3mfsha8=8;`GSr<FDt;=Za<S1Vz3lMKjd$m|?H>3^$TKRhKrJUPiYUhUcmSO)=_ z7LcDr*60}cOn&cGH?hjdFd;E~|N67>!3IH#?ViXRm+7lqr{nn=4>13TJD>})fRL%B zrG-aC6p^0JfViRi{7AXwNUdkVB=*d+XKV<iM>Yk}5GzG=A822Y$#e8eO<Oq(AaF0% zR9beDtWMN!KRJDE!mH}!#LdReE_pJBfrT^j_UuMf&klYK9e}_$w!IcZFK>=iyE20Y z25<wRx{wCS%FqV~2hATpeq3a`Qle)GLa12Cam}p_LRc2~TkV3##>vs{n_{{<(X@-F z2o5EPRHzw~3H==u^BqwIF6+IL^YasfKoVDw6+m#S&g)Kfb+x9W`&@gp-gMKU-wHsH zbuT>E5^0A4`;Qx<0N$g}na2yCQ<Bos9^9(;nFT$II_pi~zRNxTXOcM$4d;M1wpLVE z`xdu#+gMqdfxe)SxDP-PL<Y3uSMK(Xj!HV(8y6_GwY68d5QPc}Z=ig{xQPl~yolE; zwM~>2yk`h{P9^;kgw3DZ+pd?pBqby)et!Gtw>zZ2@yYj{p<r?k$Rk(O)!)yy-8V<m zvLsz013m~YYS9{jzq!BfTG_e?^j|D+@6}}F;rsa7I4p_SbK*bauU}EIDs3nIL8E_> z3WSHmvIfuZ>QraXj!J$1QQD;9;US>++9bMqI>Z=n%<_FKd6wr=AFKI?&KGwAfgiL2 z%sm4_;b?B}&|92S5JN>0mUjcvSy?u4>wP!gBl!8?z%QM@6o?qV=i+r>`YkpVCBKSp z@g}`<u(OjhH_t^s%{1V?*VGgmQ$8Q30$5fc;uBHQl?V`w=92YITV`6q$4d_1C6nI{ zaR#<UM@QFMJ^@MsK0bb`pe+fMCB|Mot?b<J5JfmT<cU^BYgbp->i|7~yUM{&y07V4 z<wV+aq=TbW0NcGoT)BaMla(JofS!xJGAPGHR721h%bmwXX1Bg)1*+5D{ZTn=O6d}9 z#7*6mCQJx4<zV(c^CsW{bI6FTRyP23g8aL_X$!lWvUv&!j~kLXMA?ygUp@*-N>~4H zVS(r21QA6Em?8+^o5$O|^AL5Bd<#OB=yVYKnIM!xw>0@=9U{5_QdN*N*U-=awX)i7 zU0+8h98wQ!J5&SAfk}vF(m?ikR#EX~{k;Vra3^4PeOCvjr<t*Ei0t^Y{5_MClM7$H z3bG63>%Q+18ynjmhJocWR{PcTbBTZG-9_J`C?+A&-JBvg$CeCn&rKi~SSV0o%I*4} zQTGxO6Q6zWi0d-=me06~DD<F>8L}DJQdz+6ir(q_?9*f0SVqaJwru<Nv2-JK-o<4N z799kzeF_Fl{1y>t*1?k;-;F7QGBb=P+=gr>Ik8OAbwl5QeKW%`%$Q#FVt`6TVp^Jm z;Gl)BF4eQVJmbMK5$4^t$;n$wOCwpJFMj*}-M5BRL-nn2mzk~5sZ3*Iqce$ipKeIV z?_hl`56(%oTF};D)VS{Zk(l@H3k<e9&J&G*{}}_`sTYKISlkH@Z*$(|W3Mx2I=a1# zFm@0Ot4HO9Sy?e8eb=4%yAW}!-1;;14rJ1yVbCUs0S$EA#U_*pzEck69BOCQ1rKqA z;|3jXb?)DyYufISJQ$z;jRQ)Q9&^eSj#QaoiIAgZ&iYihdrKkKXxc<zSm6Emk7#OZ zY7o}PNT1Na&K^Zegp(oWjtQV&WOm%5+{>Y+b20SCuw6xe(nemt-JiYCGPP~`9R7lR zK@VX4;Gk~$)r!^p2~ddc0aPcz(gVesg@>oxX7#WNzy<>85%Cg|HAu(izP>q8Y4ct- zs42gBVgw-}hyy4bNX+CE6nB50`~oe8$+^*Gq|6*KOo$%tKfhf?a%Y|#2o+`Bdryn? zOQ~H#MP1tnP#|G<diF}+fBcvV816H;l~&k+3IKP1U!U2))#7DM&(fyf5--#<$1H;L zo<HYkIR5z=WyEai@$upL*2K(i+r?i$Kl4TQP8h{?N0GBT$>?v#`#_dp%L-f<N45eS z1WYsn3OY~%L%Yp1>wUfxCGi>*E07%y_QqWykP;LR5rCKkao-Gltt38aZEg+|XayvA zgAzSEA$s8cilxKjU8|!)OBs^BB$7U>a#~vC0FNWa4twrxExx(`<niMipj<j{`b36< z-fM!m3bK&7-KWL3Mm&gE52QKo^=tLinI1bK8C~b4^b}*uH;+M{s6IM|N(H1LN=}We z2tfDuI4e^`c3fOsmZqCzfh0oN5Whq(1`C%YM<ZLtZ!Vg1%t^*d;Hlg1WB*sT>Zt)h zDnK0t(Ie(Tpmx#9RRv;xAJi*E3^WdKPGFKABR=V_yqP}z7=%;c$X}n`YX_;l?H;k? zuCTee`I>rz*PFE3Js0_@Rl#auZ517Xr$orN+uM{{;~?A)I#i;c-L#}^0x1IUBO+Ww zv{iw68hi=jFD#Sadqo!nglf7uNRKm%i}zMm96*dmP#?lm!TFriXaUnATxL4-R2s^a zz_p2CqZ%5d(orK{8EY2d;GMIeCSwSv%$NAl3L(8vziP}=FHi&`21O{4YRM=lzBoii z0@H;DMQECW6$AbwVv-uM`62^y9AK<-vLWZDLw@lfRtXXY#7&4|6i^m{cnvD0|3w88 zO-(Ym-EDnB93_f#Tg-cf3c|Y6x)ER;Fjh;D?hcwxf5}35i4_!^0`*L&3?c#uAo%MS z26#xZ{0k8Do*?^M3~IWF42?+tzt*ZD*wRXPT?7gkv1p}|M?PG7Th+%$q{?aZE*v2s zdPGR*?@yA?LQ0lPduOSsEw&fC#R7INhA~t|K{cnau&^02y!-CTP4kN~MY<16-v!J@ z(wH`<@>`B&aI3K;vHYbah<PoMaZ?k<Gt0(<m(T;U3r|U*fh#YIS@==v|L6rMPD)RY z0U&{t4rv8#h;T@mV%Ns23C({)3Ie#qiD<j)>m;F}r#_0BP~?K0fJi|AbxUwn3xF_) zf&f;Oqj@pnshFdqBb27yPL8}GpHp})cG>SrB60<YXLuweh^`5V4kQI4TU%QqI)O91 zyu3hhTb-%_C;<ld`+fKwo)+)gc!&e}14LCZ|DPP7W+LIIwX+i!sB|V1;qMS6o`BLj z2h}sAl<Dry4||9S+cEVnP!S5H1p~QI>I0`yQd1)USKRz^jSsnkcsyv<ppAnz5fv7O z@(T!HV_}(jk(m(61M%JO=SNON(YPIQsBEWx6I59+AFxtf1b#t02Xz=kxp+p&d!Xja z!+Ale0>VP7m^&L3C;U)s_x#z|>ZAcjb_^Bkj|azJ;kXNK1vSyFF2P>2i5hpPeO{|C zjgF3%x3FMSQ&US!Nx_900dwdvIYc;RIXM&<HKGU`84&>0N4D{eO}7H<up97<b234+ zV5&&%<LE~zQhI`@YB~1S9@v9}s8bx1G(A#2SRN?I9sZ$ZU~n<kclrtJN%bhNF*mj4 zheXyZ*RLbB!}}oin=}T|!Hb^v*_a}RL=W^2tXnZ(y9G8H(ToLqrJ(T67a<O{MPN&a z$O#%T0(AH--!o>q4`o5=V+M$%D(neN5|@@nkE8^UqvhS)&=6#RV1z?H%YE?z2O<e_ zy8I!>e$a(L55;&f&qbtR01?E&%K<H$K`MuAb|n4F6mqEF;X=yn%uE;rN5ZBp7=iRe zNJ_RjiR3jln$l7mvL)q!)=euDBnI)kR%FBBPZL7FGkLTUga^=Sp->4yLZ#Ye645Gw z(Xj~#R4596gwhoyr#kdgv_fNwI|vw{J|Sg!5P}gN4oceG{5&F@LK&Zpi;Doz72?WB zx>^~$gj6i^wUJkvnwk=ImgJ#`Mtd!R{H)?MT7eA)cJcCM#ap)+;TYR3%KKhVK_wTc z6jCF0a&k)dr4M7sWCm%31d2A`^hg~H1P>Lc=s~Q6!qv>$S{ymM3Z8JOyu3UV^it90 zn6Ga)#Up_VR3)gELky6+e!UAz7FPwN!o{wqSfZk$xugaV1$#2Yv5IsFXlQ5-j*cvV zx6;anNP&)xO-f1%=$@6IpIjo71Xxnvve$>tpB4T6#b3UBxxVa+D9Xd7%t#0&C%*%E zdhRd6Tv$?~?Ci_~SpTmsf!X=_aFC9XdNObt#PMCm$Wx=tv!bGm&XSRZ;Zt6CE4~<L zIdYRLSwZq=R1}^UAcTQ#d2kPO@R1i17vbeq5Tt$geoPD@f-O>0X+iuNdz;H7<%fQ( zb{@%z05$<(z=5+%NQ}LR3oxet#kC8LXxo!PzD@*K=1RV{n7BB;ybRbR>=Y<$HTDdP zo11P8M=Qm&B2Kgb-H|#36z!swFyUXc0FY&e*a*c!1x^#hwU}j@;0U1#6oGzI0;Q}? z>2z)29b#@d=$IM+1;F@1E2qPYi!U>QaA6+tq^zurgt&fG3o5%vrQ~jScv4nYJg9o0 z$iRxgKtW1m<>n^Z+S&r@Np+-yoC0_!d{Bb&D5M-Hw1erJ4-9|m?InevOnE^wHYUcA ze>JbENfr?@V8;Mj02zH-ds|6Wl~8bKYV}z;ga(9NP*6}HSqUJgVu>D7oPy#H$V-To z!$}|{D=aK5aB4BTP$yXu1nQ&o>Erh{@3kvR$3(vpA#GM#I;D+AT#cu{1LFn%bW3?P zA?0=W`TN(O2==d!|GN5Y5eRMw2w~<<w#fK8@ty~AKCI&6bRa{tLJ5c;bTQB)-%5UF zhX;|D+hnbV=@oZC^ua(uu0t5uGvye!Zy(4FxT&fC^nA5Rjb4V6fVvVXl-XMA06`$t z*VB{f(>T~xY6@f`7*xz89WYZ;J=J=R@7)<NW%FRD(aD@1dm?-(KR=)QA}r|Hg%Nn< zC!iV74{;yDh9wjez|K)&fF$QZBtcT|g&S#S^0hy<BIEBfIWB%&2c<cdL6ij1{O%Dm z;FKS{?;Ug!neWS0G<a)A4#FTv!UhH)+d-R>Rf36$2^yGV!>{%FsMuJlua1>Qh}(m6 zzK5V|sQLG}QfO}hA>DOmzv&cFd?R*&E8Y&?J^O_B)&ppJpr(QsgPy6;VyOBdlR@08 zyODkf%*ztc>%n)~(`cke1sNtSG=lMD{W2IG6x^FZZQJ_H*E9paLBfQ>6;j0n6^h@c zixwCWH9_QEY!z4qsnFN1f#8q8OretT{?|rR0_01AA3$k+*2W^CD}@>9%^GsBHvemn zlcb?q+<LjR0k{V>i<p>!L218TS;$Gi@d96H3$&*g{PW$Q%(wN`*wq}Ewwtdn%~yyv z^T4g!WPfjw0st(}fhzpcomP?Uoo49OLmCL68gm~&259|sAVi|v0L59^+FC+<LNX^L z3PUqkve&d_x0JLS6y1?B1*yydO7R}iohuxeZkAt)AXJ+5KT{roLJnspoE~DQfeQd7 z4d2+zX8KOaLl;oQE+8bN^+e`)Q_;%Gstj2P#HJ6X2(dy)<R;XXpf^L`pcEP@01#V@ zR#;!;=C+dmdlTM0h>mUnQ$YZ0y2v)vzz@DX(kOG8lmMaj&7T)*Y#}Mz(S*V+!bYIs zAp%iaKow|xt^;-8D;gS|AT%MT0pWm}7UmrlhLcbPfP=5|;yN-pIRnGxSE95;2)v@^ zF^T}5r7nS#xS^GB?-L~7rjs>Seh`ArU}NL|fU-64tNTbNUTtmdA4><Ptk6d~WuTH` z)0;&M;om&i+?*BB$7YeTD1>#w2k`2v1mj(lC#v($b)Xo3_ChA;mZ&gjXvvTbWkQSz z3XL=0zO`ergEs`mLpNnNEE=g8F$Zm8z`Jo=m#`Liij)tyjo#ux!vY{OZf@9oWCf~# ziCO?9COv)}0dki)6p5vgieG}v5uTx;;lEZ2(HSYsZQVZIy$zv?`fEyZvN?!m2fwCI z;{hK0F_=Hc1T}qgIAkb2Jpg9_r<w!(?>$&gZol0h`|}zu!{XMm^`R}ILVAI6<ij_& zdu0)4g_R-B;Im4ro-M<3UE=(sj|PZQ?G#&dOZD@(`S|?Nf54F0BqWRsN{4>{ARs~X z(hM(Ts{nAXJ}oWf3)!!qHlJ$nN1DdI>fgL+4yAj83M+ix+l^g!;mx1j#!eq4o5X;F z^@EC)`|*K0l<9BTN#2H{DwHS@sOxvMTrj(^(9z0P;R&s8pwtGu%ZfBFILF91yS7FQ z{z8qz-}MGBY8_FQSQYQ(+-?d%^fHK*P%OqkA%(e5pIV^+gy1FU1g&U}TGa)JfcP#1 zNyN0GGWltV09CX@3ltEUUQDRqQDgLH=4)O=+kfF8Vw9jlL{zA-nnOP&oTBfBDZ+g; zq_)l=fi7C96_dR}5NR%fiJ?~FoYtEZT;sJ&0YWhnECjkJAwr!504CUko`#w7PNp{C zhY?lVP$K@*0S1M7MEC>jS+^Vry%5JhGm$jaZsFt2dQ^#h=8PG9jpSm&wjL;2S;E7( z@t&QUaj_gFW8>mlLbdJ>%!3A-SA0A?9e_E?ky_s4$8Ucz0k~6!+MkHiNHgYn*!W+9 zU2q;PZEcDN)vyosNBhA+K@m;wps)3JK%ov2V?bS*2brbHUN9Sa9GW0>fz*HnY1g=6 z9p)B;iZ*Hn(SfU+83-T2siVrC68BF|&OX;hVkd-~7QpsyAk;%439=NT!-5n5JupZu zJ@RUSi0hP-py>%v^VyY^2hg-NwX+co{bEXN)KXfULhGVzNm1QnHNl0C05N?xFi+(S z*u0di)|JYSl_0%G3%AEYGQvW6NiZU;25F%yD#*I4%jN8yaRjYvtpFAw(mCx%a3-sb zetfb-&0E)D!4Dx(^-Z}V5O0d;i1AK!kevV=A!!t+E$AAMC`7d{J4a3Zp6ANGF!Xse z?2S2o=;?V6=Y<dn6v!OyQ1NC>u4;@iD$zsKZ$QBil3fJ5K3fUo-pgLSX4!pqBV)wZ zK=&dQH<{#wOdO5zgv?$6{|CV?!LI7x=J{XL6S$#`2sDRiXgo!rnTW{Vi|Pf0(j_G! zEfF&_W(eHV3EhU>zR1gqgMyF5(3f?buT_sI+51z0q>y*opuZ-vYWu>YqDd@73EsLd zdp!H1sT^bhyO%Ew`h1!Vi<D@5eP`pNKUyhx-#okWW5qzsr@4D_(xw(baN5h4KKsgb zlECbYoS2!$S2spfh_=@N%0dfqVOfLMvUbDHmum_rQb1u$l0cuK+ieSKpZG2GHCmKM z(7t#U0;*Na{_wIFG!yVcgtZmio(5Mc_gemv9?}Rg8A;2)QDVD~AV&x7^sCZ>uvoSZ z&9?BDGoT(ugb#uR^yR6NqQaozN<je=@-PMp`YDhe9B7m{EDn}~er_TL5h8#ioxnEA zLr;IFH>#8lTY_GVh?7CV!Q(+wyALT$KJ?hIj^D|dF%uJ)B+Seq#=#;eHB~m3#~|z^ zQ4*>jxLKq(uizs>zw*OSY(F)n+Basc%`*LjaeQQFnd#eo+})U9U`$9$jq_b8H%qbR z6w?5F*y}fju0r%kK6_>QuC9Y>O7`?v5c-!#)_L+6C4E@<`OB3Pal?>qzfX@Z9sK^a z5$O#O8A*Fiyd1v|I%NqyX9w>+CHsak0}`!mV(1kyF)^=~%gVrUp77m<?(DW!$R8G| zp=d<nKuYq)uJ0m2q!<_KEjQpsh^li|lgEPEIcfNx>aI_{ll;Gm`x0=f+x1<Vi>OG7 zC>aWg2pJminUXnV%#bNVWtOQ#gJepSqzK8B%=7d~LXvq3nQ4hkWjyy!-`-c}eEaNc zpa1z^=V)E4*0R=bc;DxJhWokiw-fWj2Nu7rEFbZu+<gPtkzlzS5HrH_g6g~`hp+9O z472;qi`5UG!!MEn11<*bR4{UP6mUC*#{zO%Cjf51FA#|xkuH_Zqf<*v*J9=gR?y^7 z+qWjPsB35#{ro;q7Rdy+?J6wxI;IUB<*yyt)CfpqUKR~;y+JZer%OF;E=GwuDLSV# zSyntgcIoCtPft&LN-4a@3={(-X08#nOhrGwP&BR#(Iuf|LEf_#pCKy&#u*eCNCed3 z8P}xq>2KqtmqDQs1$pY`{Qy6@!#4m#A!{Z>A80ce^S#!Ix8A|jeBchjH!39(Cgi>W zk@|OT-)7oDZB^@LWE8!v|2jiq_eMnDRl31SU0vO-G6AxJ@9((;`5ok0cYq~UTw5Fp zV#cB;nn4nQ8_7^hqS>RbMU?urm%6&T(kz=epR3t`*!U(QV`Txv8#MGk7>r6rkE~P4 zCK%%c%|`ex+4k-Al3%VTKce2e*&;0ks7BdJ#C-*2EiLn@g4Whnx>Wyw0Na^wC~r`8 zP*kHv{t>nEGm4Hp5sj?vaIK(Gup}{{3fI3zItKby=b!@-Vvv{A8jMJ?0u2Oh6#l0m zd5&<#o}#OX&fF0*vmv^%_QzuJDQ$t5{bLja3^+onv;{oTIuhiMjE#$9Id&|ww(er4 zw#xbQ7Bqw87f{@4r5X90^&dr6@WEhMk{#tNkQ9V|q9g{WgSki77c{7w4kD2$U315g z4nR;q6rOaPh}huh$7l9@Zr-$CKK7Got_95#X;3hl9BXR5J<;Nu2huZccIiZw4@?2X zXB`25q6PqG><&EsRM>nV(mpWGt?hEE-~soy3N6LsT-r-OQP!izCz4r;2Sm!bT-qeY z9VLE^4&9Bk^DXUxA^yA8px8tUn#>E3O&_>Uq{eQ8bknWqwP`1%CM1;FdM@ZJqAMd5 zIeeFi2ku!u_Dxe&^qxyIT%elQo8SD<M&vU+h7#)tpR;b=x=q#HZc|%dJ+3)uCKv&I z#~VR|l~&_`%`4rAZvgJBnW97-I8nw<qCK2tt`r5V4<VAG8rfh)hDF28fxgOsKEh&! zb~tDmCwqX|1D(c8jff|!`S9Y}iy-`zt@!!*5eY3E$R4yQD*LNKIwJk2vWu6JmF@IZ zRIYPGx$QtZLkiKFjz{-40cV3}fnHO30^2cqN~kVUJ}#esu6zMbk8mtRyS3LP%I*7R z)aW@u7_i&#Hbk^7R)W%c7qrVhz<Ojhl$wvg^$}y*wv7yUJGh}4&eq`j4c0{+Y5a^8 z`@vNBy)>9>rJ$c3=3bde_54Fa<KgQ?f;I@M#u_qibdLDVK4ffO+DOf0m1)OA+?4ne zE&guja|c3=P<;c)lJ=`4l6bhnlFG{X+LmIt5v?q%-35h(I%4cWvb-1H)1reeY5pC2 z06sTBSY>t~`cI&^+f<F=s{~4RfO}|g@Jr#`=RRM!8kXplD8p~@)MJB#rVX8c68TDT zNQhcQz_Iy0=9SxF1|H`(wbl}SGz^EVygX4mgOh-&zz@rJEty8D{#Jk%C5|=o(*NvM z9JiCE*ZEvgv3|9^#wmwtC-CW09`5}7d`ZsCE=DjxkE51gy;s)IY%T^nLAh!>2LP%z zDdp@Psc1pwOO41*H6)$wARhr9VU<>tsXaG{%whu-<85>|vlhev0!ExS)#(H32D*%| zGjTJP;><`?i5~)zCVmL42S0zkq#vw$x|?#t;+VmT1R!t<Z~SU_f2mvy(>9bm!p;-> z2(%P{;Xd@duI4^jFG(0NnQ`sn+GhYT(869%2JGx>dnN@OX4xGY4x||8PZ4)OIc1Ux zK#Umb)q*s>-h2WyQ<^gmxwo^wCN2&|;2TLqE9bG~Y$8W3sjWS$^;aTpL~75pQX(*p zX)nWwpBrxj?>i76b^{$?6fGNZVxM~Rbz}r!i6-t}pCmKHb;<IC8AHiK<OSf|?n^Se z=KAYgc}mcO^u&hgm!U{*eY!dhhEK<3{`&F-qlllk)^#+{5N77A#lI40_|I?FrHPBL zt|_sup<mw|Uak<F`g3-gxYM~q%{3ujq!D6(=?6znRQ`IKa>_o+_}3v8-U?1NrWhoO z7q(gXO0gJ{6DR4~&|IT-tFvl$8t4L`gIzpZh?&6*jo%Qhf9io$4Cs6X^~uH63>q;y zKA)t~(O>{&7C^WSWJEoHs*YO$_ZO}6r{I)*wN0$}60}LykH1)bF4zrG5{`G_IswEu z?6@>IlI4&g?(XX9>c=ij=eM3fG|=>*2>$D=p%Iz!D<Jf0yX&DC)JWD|AE{U=!HT2f z)YlS{XQz?!)wY1EN6gL>y@!8&FZ1o}Gy%W>0B(5R0#+>@xXH%7#DMYDgor~45c$el z2?Y|kCt8<@5!+i*Z#Aq5!seBUo>Hol-Ikd|FM?15p?O0wL3IT@N;JN-wmA~9*C<F< zi_T99DyT2BX)*4xhjMkP;8WgRJWBULJWibxsGN&czS4bbU_8{flXbEkUE?%k>TjP* z)Bs1WDy~+%OXP^{H@f{k<6C7oINmMut8<aP>%aQr=K_CV>zNDoSJKWq+Sy5(YtaMI zrWj%h;iA2K)p0Pb_CtX6*M+w@YG53Vl!JzY!!hI(-k@=xe!C}4#}t)WnpRK$*Um4j zvU46CWJ2>{IXQV{6-}W&Mlv07lTl@*<Xe3I$nSJQJrQ8-_|7Pe_alEzgZCHo=Lxxi zc5+c{ivfWAlZz8;pL#C4&<!4bI&n|>06IMs-Y;qP%rV5&Z}KiJS(8>yG8T-;b;r6j zeClBx!hW_4Ol5S{RnX-?UySHu#;Ov~c2d&UxYH}}!C+xA06eZ(Q|QBM3u5Hn9S?46 zAAO`Cq18AxgBpQqs`CU^sr@c!d_ZI^t*c9|Fy6a)K0?SA42qMOcCBz?vha~aG%ux` zUT5cNaZ97ev1ZMhcFK7a#wd@C_+(VAl^Psh6gSVZxQ5@iLk(5A*M>RI$8LK>8*fd> z-Vj;V!@e69jyXUKf$(5f>NG*t-YWKNlBkmQV{V0|B9HUQ$3hPV<Udeox6AFX-U{sA zQY^i*X=<=j_K-VEnS`fOx=P?<hVdKGY_g;oyokDL?VCQo)VCz5Sq3_ng7i94gL7I1 z$=Gv`w^%v;wB;huqb6z1{X~Vl7Zpfh*B;JD#f>oRvNvl%7KHvw;&!$cH;_ig(DM7U z@o~ubP~Umq5no4_kWn$)n^wELYFTr4<TLl|dXrwSY*1I0Q&~_@xV=7KNK8z;qv*%0 z<Uf4mD{w>`y}VO{q{9N5{16$bYf7r=)t04&EF~O5n|Dj-7HNH}E<42!9}Z_x2UyQK zS@m_j%?}WlA=c49Jo4Wh^uKo7|LWa;eD!~J@&Eqaf5w4-b-;feGyeSn|9_r*_&LkP z?H`lja0nHRF1xW|r!g`yRf~oEH|m@|-rj2|s;Ph&qu@6@&${C@d&rfA)@r%5wj$Q; ze$SN;?%SR|Ud{AD^b7j}73Rm?>Hs2?9s2c~wo-VvS(zGkj?p}kAm=a}a*XQh|LWFI zqU1F4j4R}HCoBEk3ZsL1y7aPN%{?vUdwa;7sHwFM)4@R@3)vS^?P8TaWgOwNumwgb z1X2PX8)amKg+<<+`fAmYky(gPZc^Z96o!?CUJY%n1Z!Jm5o@UYZaR|v!bGdI{n~E~ zJH$&cyg5qS>%IM(wl*F@YYPv^)UZN0M6czr?DB&z)Ui%%%_)Qjv2|~+p51Y0ADv%) z&d_ZyhK^ec!ChaK{1<}J*KD#JC^D8AVzyykIGF{MzKg^1lBKe~#)g>E5}5as2l79D z%$KD_nkuzJde_pD{`#v|jQ3i+-}}NWOY5Vu=Eh_!qO`RiYUiU+Uo^HfrFjPK#n}JD zxG{KnN}vTi>v_LZU}B$Zor}x0@`{SsCEnDgXw_s>!#4749=)EPe%6~VXzu6DyPCgB zJTL6{%%wIOA}lytE}pMmA?bF0RPV*I+Pc&H543lB_tE)zt*)2<6GQvgpZ$Mi`~QX0 z{HF`=PmNPrWMoXe#J&K(56XYxewo-+>Bzf?rZtqql0&2KVFaDJdi%qw(FcbtPBp;< z69$zbiYRVBDVBq@TWpv`>)i~3<zo?<Z~SCmpzrc=uPh}8$W@uO&`)4GCLAzB&}8gI zB44LN8wp&_+4)+zW!me&aX$3*lHrN<sSEX~5#`9>|IzkE2tY-+O-OX}r$R&CJu2tU zT};2<QR!jsAVSclp*D@Fe^ykq{C4?Il3m@ST2tcPnEF#KIg$1vHVmSoS$GcxJx+zV zh8|;AMj1GoBbw?41_6*d>_2dz?=7`7r?m>d#4L0VWl&&#kD}lXl3j<^Yg%~n$ZZjV zK1XP)$iSBJUvFfF-U`vaM~hvzz&QwS;I5D<;2FvSUg-SL|6DGf9Y36tFlMvz!#I-d z%p3o`_o(R^BBoB$iyrGA4$MP=UX{TABE+J>BBJW!f@^!CqHT`Fc>CuUn<Ga|g4r8R zg#-sD&^$RMAt7r;L-}J}96g2M&x7Y>+k=;PU081{x%u0|7m{aD)NY=@tSnOZC;f%C zGnbC2(3gn~0#&HL_N@@2Zo&0y$j>Khloi}5UsT^HvUBh-9R=y)m)Xab9O2cit>H## z;l8(S4cLnux?KfU-$6P*3lkHm(hGq%!UX_Kc<LkT`h+IO=Bb-5HJRYOh<|@~V7@8U zgDtE&G}6?VURp5l!Z@JIHmfk|0cWukDV8Aj&Vj2{HrZT}ie{2026GBibWP6gJ$_tM z(Zk|CTLYsK2rPM2jKa@QqzxA>jQ6$j)#oA;8is1E2_@4E`0SQQ7&8K@<^}uS3q(A8 zNLM)1tV8R|OP&6ho>_CaY&~1*I+>ci0WFC>6%|+WyIQ)~1&=?9T=2^saLnzWbWLxF zQXR=)Kd`+br8DeCZfHb=C~4)6ltjI(6i6jsYWQxI^ToIicht0GU*`>QF>-U$HB*dg zW{n(v!j;!Y_0z40;nq(rh4NnBilf&H7Tt2io?3+~#W&oQirK@kH&&SaGD)0$(pfhq zCWcZs+>1j<^Zunc@%rpXskX+WJ&e;j53pYc*}j^!|I71D7vlZjp1pXf@y6euw+)ix z{a??WoD6JuQ9`1<Bh~o%bIRw>`pF<hUO#n8F<uSgZ*?c9>>H)eFq5A~GLkeB*If5% zPSFz`E5f=1_MZv~+9hXaEnKcf;yI6E11Ps%B_%!R{uVX+7{rN}g@t<hshT((8Z<=- z-T8@$XGkOxR2W;<t$FqG<?3DZbg=F5AFUPYZbVBl8WZ<2H}`y%iRuaf#SHY5&_b}l zASS5G^MswLt*uS>{TURr-5na2_F~Us5CEcqNFJi|t1?Ybc==KmT%+c-)MC$|=TinT z=#3VSlKgSEAFt5|h+SNCJ0>L5Z69uHVNnl7n!27|GD`0<F^oZx`<5+RGA{Q@w2>Uw zd!NqC&K?8&KVNOI%u>#)uBsXh#5uOX0*`^Ap}JNX)*UKm&iE0i=O|DP+_k!~aSRqO zZ9rvHcXui<V=1)an|@9W&2-w;T`YG3k6{Z1g#*_1R%+_*#&3)Pl{=|!?YtueA;t^v z%+(r$i5c_ic;9;&AeAhJ6En~V$Rc@s+y=K|0X2?xzGE`jB^T*^**Q4Qsj2ZBGo!K! zT?S75DketK>9b2bL0J@^C|L`5OO=~WO-AMx2sg>VaYg{&<b&cGZT8efn(Ed8hCzrZ z1dbk!R_A0(Y;rB{cCoit@4c)AGhV$ATcP6aBgNgRrKP8R$_=3h8G#<R#i@8f56#ri z6PZvpi8xO@yU!&4>JLA<5p*X$QF~0RJsznj7hKs_kA}hnSyp>AaNWU26~PpY7q58W zFc-_+Sb{!IOuUY+f4S&U49kK*JrR?Lg`F53Edh~C@QcgBNqE31?A*CC;q~i}v<@c6 z7%)wpjyBmkc;GsNr-5LhkvdYm!9{KlBO|12@dA8&&!M4zm6F24(4U%xZY<%Xxanr5 zr(b4gYX-;pCt(*2bNHc81NC$0YK#)t6oDs?U>A-+L;8lYg33PNl|f=m)P_trVYi}S z#96_P0XY_!G0K&q5idr{4Ld0}u*rc2$a&;QltVu33iDIdH;F)Xh<1xyFzi4bl&Xl@ zI8Zpg7W^zKI^mACfCvaA?d|Q;pkrG=M4}yjgqvF(=Yn>sMn&ecJWiL>g%@wGJCA&d zMUNtHq~(4<z$IW=3^myGBxQl$?L-tF?Ck8g)BSR9I!K({+{AjLBPJ86Q#=${2O|{| zQ&W}DW!26wR}ri^qF>M|Fg3s?O{{$0TULS*Fm=<U^_~ZO4Nc;Ub;Zj(3zs^qvMf@A zik@2XAFE~G<Nqo?UIr7Khpy*2P0je_WsjNJS<XiaoQzbL?Cl%Xg^G8q9&O0*vrV>V zj0G2+A6veu8)v90XK3DdmRg-oa<c_jvqIN8(Vc@}S0oZP{NRB(pfpU5h8UA3sP41u zT_#86!tuZ8k;UnXcMY1l6zbz47uFIHAjOhy6INwH>o2|2Upl@}dfwH<!^lXYudi=( zd|X3GDZtXwG6M}g7*s+*fo@@8p{1>D%e!~Cx#|28G~NV-^An57B`zmN3%%lCo>VJ- zVPWU4BM)II>R7yI&z_w<794k2X*boaP}nKfWm97#&}RkUZQ6PE2_mAR5J<+QrSau< z*{CI(9N;_p>xlTsv17*&8i?r>5fOQyz;0&c1bXt`lODQ{6r}M>eBWX=7iIxk=6lP` zQNaJu0*4c3p{8~qtU4B)aSSULBh?s+=RFJz{9W>bI}YIg79<Nd!~CLUG+nxp`6ZS# zu+auEE;GTez|f$PE(biO86-!rV&g!~vu@9j!y0LG&(Y<<-&JEXgg**?v6D-~q}aN7 z;A#Q{pm4(Ao~p8k@O8Plth&3wSZpaFKGUeShl!~fzVewYYc#^TkSO&s`j_*FlO~zk zneGdY#{>o6+hi+sRD$At{W^J@m7q9&tNq4AZuVya8^4Z@4x1bRC63jAw4w&oza1S$ z#0o-QIEpn%tbEGyzzU<F?r$x@Y-bu<TVDbOzS`{YHNMQvz#t072uCb#J??(-Vl1%e zrca;Xk}BR2UU{G1@?bMsJn9hPYlGc*m<0N^RNaY%Bq%ed;g80UKA{}CD$EF54??Hs z8DwO2u69YRFboR}o_3~XC`ghZWeD}%AOwv_VC30D^1c-<C}ak2U0vgLMrhq@gZ;PS zd=kC|5r_ZAk6LVIlHfSr>C!c1cl9bVKZu9f8=8ZftZ-dkE~`HBuQ2+1NL?LWvfrLh zt=jkBpEnV+P4?^A=8^x79u4P0{m{@bt{_?Ed$~3gQA#{vW4;}jRs<Y1Zko&2#NKEX zG0C7&_T5?R{d0l1%cQja%fkJ03!z6T@A6kND#lw0GPp-nEo5OQ9KKCcya&p85suPV z>SkQerMn&X;R`|*g~rXHM+&Br(X5DNZF{M9($cB2((F}I<0yc;&F!?oV~g<)L||XT zgF&*$p58@;$*3r>gjC@r`eKKIWItv<dTq;3dEgq4e0&3~50WSQr|4|N>U;dW*hZS& zec%AEk|<k<T&!w>m25_vRYusIj!Um1FYq8Fp2ONDR0@&8S8m)0S1>b+$4X<9Tf25G z?Vl#92@fdCm63Ju=;oX{y=f}^5`sRq<ait&{t+Ei*d)3<hwjOh8*+xezkd@@|2xY} z#fVSjWn#<ua8f=J`-xQu)ea6uWGKo9S>c7v1>{dFutzF#;p#)G%t7UW<BmUwyfYSb zQE|vs=a=V-kH}pWiNpgIk_U1F^Ycf&`?A^~2{TF4!Z^mCO}`=%376XzaDs0xNn`Au z(izI~cOKC<a!)>;9q&p+#y^71YZ?*x;?DY&bz-L8v{_jq*U0}?lnCpAXFwvCWP2u7 z^=@UKe5~pl?c}TcK7qUex!=U>?Cg+Ca*ceKEo>NRtWcJXDKBprtT(p#vE;F{f2ewF z>JU?anXP=57B><oT*e+|hK3(Mge#7c$9Eh?sICrG%K4P=;B95)ZbO01+0|^_p%L$w zJc<i)bMeuT(vphKjdb~X$SXXGPj;NJ3G*weHF4LG4*S;og~gATIs`j8<<W1k2qrr^ zbk9VU!q3k(2YMl86QdhOkGz^JLtBQbQz)K%FhSp&t-X>pMX#3Kg8r_eB4^cmJv$zZ zZp&D#Ex$|m+s4w4fT|3_yjPdk@6Z21;`%Fz{r8FM|MiOdweJ3d%lmb~zvb2cyUY7^ z!M|VF>c72gcQUY*Bu}^&a&jHf1QVIwu8ZzOlrB%p*6b^}x*&JPYq9stI-8`qiQhL$ z)9RmeejsU~(A#cY)qDBfw>hqh^t5L8k3Ug_=baY}nX`VR;A^yPkdK-BAa~=9GKnpQ zEF<>wzSq0Qrc_vI9*Oj#BpVnQ;tDur^ug<kV>QW9ZBt0JEkCn2MR&)23&DTRN(t#t z0fx?CE$%qDegWqA+T;1<`%~+LjCn?yz#yCx3Xd(c5`=>o=+}M97Nr;;s=)f7U+d_4 z6gZ?6_=fEPT(x37$IUXe%`&LnBYw^c;<A+PQBfzRW@}=*ETft<@D~p!C*<}%88c^% zM%I4!;+hjtN{A6}^mBj6$E)sPW^VcO&vg}iRg?CaxRRh=!$I;0uZ{s#@O`GO*}ru> zC?_pqnNrA=D+}ytG7*sH8w{@=Ad-w;L6ih{oS(opdmYC{`99w(Vp^)`%%9bQPlAV+ zKRPcucfG<iVsi85)vJqMBvn2ks-*wKw0?8o|K6GZ?#6%r_kX8p{mrQUiaUS1$iGLK zU+?^WVK)9mtbrm4iF&Nsp}TyQj8wO{A2Jw(f%o5L#W-%ttD{qYV$hoTjN*MOjl`oF z0f#<_9%mGtdS$!TkcoY8Y9aH*8&$b-SaMAtVPM^t8L}&5)<1a9xXVB>9*!}?D|l=M zS%VrQ5w-Z+xl*5bU8mq*2k(=0=|0W8;M9!ui2dVLGlqBjx#R&W78*U(CA9s=m1}_7 z<-%mr3-nX_XH;ove)(kDvZP%G)7Dd?y6eis6!xt~A>~aWzU7VQVs0VnLMAJDz$?O- z-gh;GJS*mZZgMoiUL+X_<^u2rn&Hh;ye=G%71$5`DYkj;$*`9#&#!346oA}SWgAUa zU84^jY3=LBTVW!<59DPUB9|;VhLH3OtRy0@rY$!?&^jg{AhmzhI`~5$K12zqfGl*B zM2g+Vu73V}0(j|L$cd>7w}A)>*?e$m*q28SyKFQeZt+=-{0c*p&i%QA5f{iyl5)uD zaN=L?CMPo76kHIo`i_po*;&UwSb{79L?abZ%OvaO2D6Y8?&yMal*fDmQPx6Ptaz>9 z^=oBR$2VP8t-A)NlLwD1ysY;%>j=7-V7jLsq+Go%Q>URwr&n278W^_V46{0>CB!~t z&YLR82|Foi<gg|xo|br<_=ZD%KAGHJw*0Py+qh7q-><fYjBGjo&YjIOb8|%Qg%uoT zNKd3+uP<E3NkOGyzg2#v++8Y0WeMD56;IFNw{PFRS%3<j$o|yz^?9;8mON*Lrsw7o zfe)a_02qKQt?8aMO2PJy4hMA44{K-0qTV7h_ZP@IQH@tyW?IgSHX+v&aryoN_ybOs z0U;;cB67oo{Cpjt5FmJK7rJKBacc!G?Z%NuhljCuV;IOsDW~>5L-GTFV=E=)!9D)S z$^k&gA|F(<ut-45%vORJxDufd#dxBA2kqG)IXRgaHtb1(=sz&&oZ&oa)YQ^4_WTx; zs-$Fj#z)so;@YyZUd6`#0bC>Ra_^1csuxcb<Du43*U=$Deiw9!+EA{oRCRws!VU)T z2<Re~Ha1U>)iC%zCj`pq+((b+4lx0mFe_g~TYDP2xMB5N0#G$NGE!f>JaYuhK3Ll@ z<XeGA4hWzG+&}ZHS5*nP1H9H$?54qb?fFrZzu3$}4?BF^_4pnVh2-cJ#u#a^8uBb@ z8JUaeZG>rJPp4t9ADcm93Rhi6*ys<_`Gti{u>1;_=f{u$X=hnIYECscg5(Jm5Nf|s z2#eKpbRvjt2??O?`%kxq+3l1YX3%pt$KG2+9f7UnkY{sbcO)aEpads%<!c(aP5Qt9 zrok)XI(f3&J`>zqq&<*B*Mr@{CWqus#9{P-wvJBbh0--HVoaEymyqp1kwtLkh%rw# zDAvbBMPxC{LHX_qg76G7|7uh^z&e5q>A@;gLsE$w2(2>7fDRnib6bd&F!r87^f;hW zQ?f0X^ga6M9@PIS98aAkAej%uLOe+O3A>2B)HHBEXKiim3R(I&6_w}MHNtGpkY3s` z0<hJqgakRTuL8~)7#vA$Nvy9|*^4ctJeQ}U9H1yK1bY&-&>d*XjPhKOjvnOTkQw?A z8PY*&#>~gReJf4+T&RQ1Zi2P7wP`o4n@*2fQb2*>IuaW;4SC`iQ5r%fAXuYBk_m?O z0~2y+(Q_KP!}0VZ1YkP@Djl$STp>7$BKE_9RC`5UpiB$nr6wjV$3;(1&(hlZ94~z% zh)yX6i(hs%6onr;d6Gclki!OJ<YB&z4Gl5AzP?`{1YY2M3DUK!xVU(G&XpLC=@{k& zOfQ2eHZ_`_D5t(;Wu*Ywj0!*_D&`YO*a#6&&*NE^c+`+iT@GQulrkJi+QjMva|{dU zBY@RpFr_U~KymW$G!GBIgq4|fX$mtoOHzx!h+0`3!pGj7C~_Fnz|0P#+#UV)P1Do! zq^<1gf~y_-`c)mqNz}$D2KSyX(zdZlVRD-|jdB-e4~KU&pQ?b>!O022n=`=~SNfz; zmz#Po(&l#&s~UjEFb0uWDJ3OdSn4+K(K@CJ4GT-e{v^TuQ$O)A$?iA@3=c4yTyV=+ zXv<jB$B%*7{Xt-?=zVo{`mXgY%`bo=fLEdle}NR*MRE<{WNq#n(i4EeL0Z+bXZ=w} zrsPMz$A*5Ug^$$p#Eycp2fqNHmQq$$c0QX6f@j}8<%2Jj51O>|+Gcm)A~*sn?T#zx zo?rRgH~DUC#`I+9>kbmKcOprNZ%7|cMA*P&rJo=|!KAYZy)R{X&)Lm2j#tiZ1{z>t zIjX=C^zOb`_ua|KD`41F4^169a>VxS%SGB%c<#u^cgrI*G&FI^{^h;lS(HPKj6W_F zH(HKPDzNjD8t%5OS+KGhoLSAmYXTM6LlxNB+{Me>xZ<QkS2|9lP?ec7-zgHzFkmPu zR@s)OrFC@&5rZa*$3_Qe)Dj+G`;X*E^VFM7D|8=Imv?KfeC_a8xx6}c1w&VJ`)6F? zQve_1VPkPpO$fqA6~wEt4Fl!^ugnERNXP89xAslue3=xsCA;vnX9Jo@roESi)pd1~ zkRIygTt4mX?M)1Y;^yPn<^h0;fQZN|%&IwP8f`%{2&34fhl`%l^73V6+-O_wq$UH< zzMh_*jFv^)h&_`^NcLEFyf-<@A6J2WOaZOk79!(S;sst);()^aWn7#z@WQ=&_wJ#m z4^KM_@CneZ$I_U=m{9Rge@H;eEx5Abx!|y+5fnj%g}WTOr*fs7*B-3>NHG^xrT`gK zZ*8I0Hg9Vc{U7DE2`~j*0*3eYY?iO#Fpy$^LF@ciS7mrNcv;DCB7}#6++!XZ{5W{S zKUlWIo(XI@E6Z|ad1=cAZvxH*F*3euI1he~fN}tS5;zv{9eXGUEBdY=8#=3?Fbsmf zE5f$@{Io^#`E+ThSA;TMKQ#KkRO@vEYzt=jLLKiti~XLKf1U*Vmz;bSOtwbAVP7V5 z(1n>nUHTGH3Wag{=bkMkZo;Z?ANNJab0on)Zh|0b2I#O3lvf(Ce{7v`=vMi2I18nd zOX`u4k%+aQLX=J<Y-JEs=(tths|>6g)ShYh{$Yr%RD~He+}#Vsm;$gD(o3QYMo7|h zamjt^I(i5Z6KJ=lvhp)bH;R4<?7bM+WO)vIBV_mm<>lpl{S*M;3P}Lr;0QFC*Z`1- z+#Vi9#HCm4GI%#MWdY<6u5Q3oYt{A^H4tA3A}bV%B3JuwfA(B)C$*)^{Av?G`j`iH zUM4oi5XbDMf4F4;Fo)QXBv+Tm9b01HNKK;O3S#U<4#SJ!!mK-UR1j|)z_pKC|D7QL z7$xpe;&~wKk-%%uURwrRFhL_pD`@K!YMAbY&vUet8ytE{$O$s_Jh+O4j{%gAF&YL* z{W<Pz8j9EMiR!1T={oPqolmCNZ#Dw5(AXozi>eq`gtMcRl0fG`RI7(HDFIw)r3IGQ zOUc@q@`NbC0Xt+75xn$*GAb@1Ax%UiVyKB33afT#DAqytf?WomW19f9PORSE6@_W= z&jQO5A$8`eVRzCR#ob7uk)1Yztd)t334WCXnxa$ZmC6i;D`({8S3N&cRPEfW1NSOZ zmzvuFPg-~7oht_d!Q><oIsd__C<fURv{4o)%vzQffULj5?jW4oHcgnWGYtx<GC7JZ zU+yhW+&Zbem0$4VVK;ID)m>hh1=nsE`@szm>mC^-%PLOzAC%Motu;^4p$JyuwmWk} zEW<xS|8Y)7=gfXwP~gCfE1TT$M*z%@<E7Wnp%wCVz9V{R*CMXrHJRKWic%>O7jFCy DBXMbs literal 0 HcmV?d00001 diff --git a/notebooks/images/pandas_logo.svg b/notebooks/images/pandas_logo.svg new file mode 100644 index 0000000..a7af4e4 --- /dev/null +++ b/notebooks/images/pandas_logo.svg @@ -0,0 +1 @@ +<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 818.63 331.21"><defs><style>.cls-1{fill:#130754;}.cls-2{fill:#ffca00;}.cls-3{fill:#e70488;}</style></defs><title>Artboard 63</title><path class="cls-1" d="M290.85,199.21c-10.27,0-20.73-4.25-27.28-12.58v45H243l0-111.09h18.6l.71,12.22c6.38-9.39,17.71-14.35,28.52-14.35,20.73,0,36,17.37,36,40.4S311.58,199.22,290.85,199.21Zm-6.37-65.55c-12.05,0-21.79,9.39-21.79,25.16S272.43,184,284.48,184s21.79-9.39,21.79-25.16S296.53,133.66,284.48,133.66Z"/><path class="cls-1" d="M404.36,197.1l-.71-12.22c-6.38,9.39-17.72,14.35-28.53,14.34-20.73,0-36-17.36-36-40.39s15.24-40.4,36-40.39c10.81,0,22.15,5,28.53,14.35l.71-12.22H423V197.1Zm-22.85-63.43c-12.05,0-21.79,9.39-21.8,25.16S369.45,184,381.5,184s21.8-9.39,21.8-25.16S393.56,133.67,381.51,133.67Z"/><path class="cls-1" d="M494.87,197.11V154.77c0-14.88-5.13-19.84-14.52-19.84-9.75,0-20.38,8.85-20.38,19.48v42.7H439.41V120.57H458.2l.89,14.18c5.14-9.75,16.65-16.3,28.35-16.3,20.37,0,28,14.18,28,33.13v45.54Z"/><path class="cls-1" d="M590.77,197.13l-.71-12.23c-6.38,9.39-17.72,14.35-28.52,14.35-20.73,0-36-17.37-36-40.4s15.24-40.39,36-40.39c10.27,0,20.72,4.26,27.28,12.58V90.83h20.56l0,106.3ZM567.92,133.7c-12,0-21.79,9.39-21.79,25.15S555.87,184,567.92,184s21.79-9.38,21.79-25.15S580,133.7,567.92,133.7Z"/><path class="cls-1" d="M686.6,197.14l-.71-12.22c-6.38,9.39-17.72,14.34-28.53,14.34-20.73,0-36-17.36-36-40.4s15.24-40.39,36-40.39c10.81,0,22.15,5,28.53,14.36l.71-12.23h18.6v76.53Zm-22.85-63.43c-12,0-21.79,9.39-21.8,25.16S651.7,184,663.74,184s21.8-9.39,21.8-25.16S675.8,133.71,663.75,133.71Z"/><path class="cls-1" d="M750.73,199.63a60.16,60.16,0,0,1-30.65-8.69l3.37-14.17c6.2,3.72,15.59,8.51,26.93,8.51,8.15,0,13.82-2.48,13.82-8.86,0-5.49-5.85-7.44-16.3-9.92-18.78-4.08-25.51-14-25.51-24.81,0-12.05,9.39-23.38,30.12-23.38,12.58,0,23.57,5.49,26,6.91l-3.37,13.47A44.59,44.59,0,0,0,753,132.31c-8.32,0-12.4,2.83-12.4,7.44,0,5.13,5.32,7.44,13.46,9.39,20.2,4.25,28.35,13.64,28.35,23.92C782.45,189.53,770.4,199.63,750.73,199.63Z"/><rect class="cls-1" x="74.88" y="68.42" width="24.09" height="50.02"/><rect class="cls-1" x="74.88" y="171.17" width="24.09" height="50.02"/><rect class="cls-2" x="74.88" y="133.04" width="24.09" height="23.6"/><rect class="cls-1" x="36.19" y="109.55" width="24.09" height="166.27"/><rect class="cls-1" x="112.78" y="212.44" width="24.09" height="50.02"/><rect class="cls-1" x="112.78" y="109.61" width="24.09" height="50.02"/><rect class="cls-3" x="112.78" y="174.23" width="24.09" height="23.6"/><rect class="cls-1" x="150.67" y="55.39" width="24.09" height="166.27"/></svg> \ No newline at end of file diff --git a/notebooks/jupyter_cours.ipynb b/notebooks/jupyter_cours.ipynb index c5b9ef6..257fcdb 100644 --- a/notebooks/jupyter_cours.ipynb +++ b/notebooks/jupyter_cours.ipynb @@ -2,30 +2,34 @@ "cells": [ { "cell_type": "markdown", - "id": "danish-perfume", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "id": "suitable-agent", + "metadata": {}, "source": [ - "# Introduction to Notebooks and Jupyter\n", - "\n", - "Etienne Kornobis, Bertrand Néron, François Laurent\n", + "# <center>**Cours**</center>\n", "\n", - "2021/09/27\n", - "Institut Pasteur" + "<div style=\"text-align:center\">\n", + " <img src=\"images/jupyter.png\" width=\"600px\">\n", + " <div>\n", + " Bertrand Néron, François Laurent, Etienne Kornobis\n", + " <br />\n", + " <a src=\" https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/\">Bioinformatics and Biostatistiqucs HUB</a>\n", + " <br />\n", + " © Institut Pasteur, 2021\n", + " </div> \n", + "</div>" ] }, { "cell_type": "markdown", - "id": "successful-hampshire", + "id": "aggressive-microwave", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ + "# Introduction to Notebooks and Jupyter\n", + "\n", "## The concept of literate programming\n", ">*Literate programming: Instead of imagining that our main task is to instruct a\n", ">computer what to do, let us concentrate rather on explaining to human beings\n", @@ -38,7 +42,7 @@ }, { "cell_type": "markdown", - "id": "valued-royal", + "id": "yellow-stick", "metadata": { "slideshow": { "slide_type": "slide" @@ -56,7 +60,7 @@ }, { "cell_type": "markdown", - "id": "relative-settlement", + "id": "vital-watershed", "metadata": { "slideshow": { "slide_type": "slide" @@ -79,7 +83,7 @@ }, { "cell_type": "markdown", - "id": "hungry-developer", + "id": "electoral-disability", "metadata": { "slideshow": { "slide_type": "slide" @@ -98,7 +102,7 @@ }, { "cell_type": "markdown", - "id": "international-measurement", + "id": "corporate-composition", "metadata": { "slideshow": { "slide_type": "slide" @@ -115,7 +119,7 @@ }, { "cell_type": "markdown", - "id": "composite-insulin", + "id": "subsequent-proof", "metadata": { "slideshow": { "slide_type": "slide" @@ -138,7 +142,7 @@ }, { "cell_type": "markdown", - "id": "naughty-kazakhstan", + "id": "caroline-float", "metadata": { "slideshow": { "slide_type": "slide" @@ -159,7 +163,7 @@ }, { "cell_type": "markdown", - "id": "joined-landing", + "id": "funded-singer", "metadata": { "slideshow": { "slide_type": "slide" @@ -173,11 +177,15 @@ "\n", "- Using conda (recommended on jupyter website)\n", "\n", - "```conda env create -n jupyter jupyterlab```\n", + "```shell\n", + "conda env create -n jupyter jupyterlab\n", + "```\n", "\n", "- Using pip\n", "\n", - "```pip install jupyterlab```\n", + "```shell\n", + "pip install jupyterlab\n", + "```\n", "\n", "### On tars\n", "\n", @@ -186,7 +194,7 @@ }, { "cell_type": "markdown", - "id": "decreased-window", + "id": "forward-turkey", "metadata": { "slideshow": { "slide_type": "slide" @@ -212,7 +220,7 @@ }, { "cell_type": "markdown", - "id": "demonstrated-poultry", + "id": "egyptian-wings", "metadata": {}, "source": [ "## Overview" @@ -220,7 +228,7 @@ }, { "cell_type": "markdown", - "id": "lightweight-latex", + "id": "sacred-absolute", "metadata": {}, "source": [ "### Markdown \n", @@ -247,7 +255,7 @@ }, { "cell_type": "markdown", - "id": "important-launch", + "id": "automated-provision", "metadata": {}, "source": [ "### Code examples" @@ -255,7 +263,7 @@ }, { "cell_type": "markdown", - "id": "significant-prison", + "id": "dietary-manual", "metadata": {}, "source": [ "#### Python" @@ -264,7 +272,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "loose-shame", + "id": "dense-fairy", "metadata": {}, "outputs": [ { @@ -281,7 +289,7 @@ }, { "cell_type": "markdown", - "id": "chronic-lyric", + "id": "ordinary-reception", "metadata": {}, "source": [ "#### Bash" @@ -290,7 +298,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "studied-accent", + "id": "moved-drain", "metadata": {}, "outputs": [ { @@ -310,7 +318,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "political-machinery", + "id": "undefined-buying", "metadata": {}, "outputs": [ { @@ -327,7 +335,7 @@ }, { "cell_type": "markdown", - "id": "herbal-budget", + "id": "distinguished-drinking", "metadata": {}, "source": [ "#### Julia" @@ -336,7 +344,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "naval-tsunami", + "id": "immediate-appraisal", "metadata": {}, "outputs": [ { @@ -355,7 +363,7 @@ }, { "cell_type": "markdown", - "id": "offensive-separation", + "id": "senior-notice", "metadata": {}, "source": [ "#### NB\n", @@ -365,7 +373,7 @@ }, { "cell_type": "markdown", - "id": "alternate-housing", + "id": "general-lingerie", "metadata": {}, "source": [ "### Inline plotting" @@ -374,7 +382,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "educated-superintendent", + "id": "similar-color", "metadata": {}, "outputs": [], "source": [ @@ -386,7 +394,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "brilliant-resolution", + "id": "contrary-nicaragua", "metadata": {}, "outputs": [], "source": [ @@ -396,7 +404,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "cleared-federal", + "id": "civil-manner", "metadata": {}, "outputs": [ { @@ -420,7 +428,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "knowing-design", + "id": "noticed-details", "metadata": {}, "outputs": [ { @@ -452,7 +460,7 @@ }, { "cell_type": "markdown", - "id": "blessed-french", + "id": "seventh-desire", "metadata": {}, "source": [ "### Interactivity\n", @@ -467,7 +475,7 @@ }, { "cell_type": "markdown", - "id": "hungarian-version", + "id": "naval-novelty", "metadata": {}, "source": [ "#### Ipywidgets" @@ -476,7 +484,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "economic-stomach", + "id": "atomic-tongue", "metadata": {}, "outputs": [], "source": [ @@ -489,7 +497,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "silver-christmas", + "id": "fallen-heavy", "metadata": {}, "outputs": [ { @@ -516,7 +524,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "independent-cancer", + "id": "comfortable-consistency", "metadata": { "tags": [] }, @@ -544,7 +552,7 @@ }, { "cell_type": "markdown", - "id": "engaged-congo", + "id": "extraordinary-intellectual", "metadata": {}, "source": [ "#### Plotly" @@ -553,7 +561,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "nominated-bench", + "id": "rural-maldives", "metadata": {}, "outputs": [], "source": [ @@ -563,7 +571,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "awful-delaware", + "id": "completed-modification", "metadata": {}, "outputs": [ { @@ -3053,14 +3061,14 @@ }, "xaxis": { "anchor": "y", - "autorange": true, + "autorange": false, "domain": [ 0, 1 ], "range": [ - 17.570230105465004, - 32.92976989453499 + 18.190799316093184, + 22.287828595853814 ], "showspikes": false, "title": { @@ -3070,14 +3078,14 @@ }, "yaxis": { "anchor": "x", - "autorange": true, + "autorange": false, "domain": [ 0, 1 ], "range": [ - -57.78545119705341, - 872.7854511970534 + 198.4717679243281, + 780.8290423258014 ], "showspikes": false, "title": { @@ -3087,7 +3095,7 @@ } } }, - "image/png": "", + "image/png": "", "text/html": [ "<div> <div id=\"74762f19-ee49-423b-ae1f-8d69b36caedd\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"74762f19-ee49-423b-ae1f-8d69b36caedd\")) { Plotly.newPlot( \"74762f19-ee49-423b-ae1f-8d69b36caedd\", [{\"hovertemplate\":\"x=%{x}<br>index=%{y}<extra></extra>\",\"legendgroup\":\"\",\"marker\":{\"color\":\"#636efa\",\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"\",\"orientation\":\"h\",\"showlegend\":false,\"type\":\"scatter\",\"x\":[20.13,21.33,22.18,18.68,29.01,27.51,22.59,21.6,22.26,23.04,20.06,19.31,24.97,18.82,28.66,27.45,19.57,21.15,23.03,21.55,19.84,23.41,25.21,25.21,28.19,21.9,20.01,23.23,22.86,27.28,21.71,21.93,23.91,22.49,20.57,24.23,20.95,24.6,21.79,19.22,19.38,23.06,21.5,18.59,19.46,21.82,20.56,21.78,19.95,24.38,18.67,21.71,26.02,24.74,20.43,19.1,31.24,27.41,22.22,19.59,20.9,29.0,20.9,20.32,31.83,19.88,24.87,28.73,22.34,25.43,23.5,26.75,23.25,19.92,21.95,23.04,23.05,22.49,31.14,21.63,21.55,21.95,21.36,18.92,24.75,22.83,20.45,22.22,20.3,22.28,20.01,30.63,24.3,19.26,24.89,24.41,23.16,22.53,19.95,21.32,22.72,21.29,23.87,22.47,27.71,29.66,21.27,21.29,23.66,21.16,21.22,20.6,22.85,23.25,31.91,18.72,22.43,20.98,21.19,21.61,20.65,22.71,20.32,25.39,19.05,19.83,21.09,23.14,24.38,31.23,23.04,25.21,24.43,27.47,19.59,27.28,25.72,19.22,22.75,22.98,20.51,27.39,19.57,24.85,23.18,20.43,29.31,29.41,20.04,28.8,26.02,21.1,22.06,28.98,29.96,24.0,19.81,19.81,22.58,20.43,20.2,21.22,24.52,20.83,27.4,20.57,30.11,27.7,21.56,31.58,24.53,18.73,25.01,25.06,24.15,22.83,23.05,27.66,22.46,21.1,23.38,21.54,20.86,23.51,22.13,25.14,24.44,25.39,22.18,27.68,22.72,31.83,20.95,28.19,22.26,24.91,23.87,25.34,19.57,22.76,22.77,24.87,20.93,25.53,20.38,26.7,19.26,24.65,31.96,20.95,20.61,25.73,24.3,20.14,27.11,27.75,22.22,22.53,27.73,19.07,25.81,18.62,23.93,24.96,28.34,25.74,25.64,22.26,25.01,21.22,31.05,24.93,21.3,26.09,24.46,22.95,23.58,25.51,20.66,30.61,25.05,22.72,23.45,22.47,25.35,23.18,24.3,25.9,22.78,25.97,22.44,21.53,25.14,21.91,22.14,31.69,22.64,23.56,22.98,21.26,21.38,28.4,28.71,25.79,22.59,29.4,21.97,23.56,25.18,21.29,22.46,24.16,28.08,23.3,27.28,23.99,22.1,20.42,26.83,19.92,19.91,18.67,21.07,21.48,28.98,18.92,21.9,31.55,20.07,24.24,21.33,20.68,25.46,25.39,21.13,20.06,20.93,24.09,23.59,19.13,27.43,21.56,20.7,23.22,23.73,20.13,23.22,25.96,29.41,20.95,23.53,19.97,30.12,19.06,20.17,24.74,22.95,25.07,26.77,21.41,23.63,22.76,25.55,23.3,22.86,19.33,23.53,27.23,21.15,26.72,26.12,25.65,20.69,25.71,20.19,21.77,24.24,23.62,23.12,22.2,27.77,21.96,19.03,19.47,22.32,30.86,27.47,27.88,20.93,21.33,21.21,20.43,25.35,31.86,22.83,20.2,20.44,24.69,21.48,20.02,20.93,21.25,22.86,25.28,23.18,25.94,23.24,21.06,30.36,24.67,25.71,28.73,27.77,26.47,23.4,28.69,25.3,21.29,23.24,26.19,25.72,22.89,24.53,21.79,27.13,21.87,24.09,22.98,21.3,25.28,26.17,22.53,26.7,24.89,25.5,18.64,23.83,21.63,20.51,24.11,23.09,24.44,26.95,30.05,25.28,24.75,23.24,24.11,28.4,27.75,24.91,21.5,31.48,26.88,22.98,26.19,25.97,26.44,23.22,24.16,25.0,23.76,23.59,22.79,26.77,22.99,26.06,22.54,23.45,21.84,30.48,22.75,23.22,23.24,24.15,22.14,29.58,30.79,20.97,28.98,20.47,29.72,24.28,27.96,26.58,19.36,27.33,23.87,30.81,26.17,27.63,20.71,19.6,25.09,30.46,18.5,25.47,20.39,22.54,23.03,23.91,19.59,19.06,19.46,31.61,20.93,23.91,26.76,30.09,20.66,21.87,24.88,26.37,27.73,19.77,20.7,31.64,26.12,24.6,22.49,24.38,22.94,25.39,21.82,21.56,25.47,24.09,22.13,19.77,20.44,23.63,20.79,23.71,22.1,21.09,20.31,30.44,28.47,28.54,27.4,29.68,21.25,24.16,21.9,27.55,18.9,24.65,24.74,23.14,24.84,26.98,23.42,26.02,26.49,24.0,22.14,22.83,20.83,24.65,20.02,20.96,20.79,26.17,20.83,24.69,20.68,22.06,25.0,23.19,22.37,23.23,21.88,29.24,21.79,24.02,30.48,23.18,30.97,25.55,21.56,23.19,29.6,26.02,25.72,23.73,22.32,29.76,31.4,24.16,25.99,31.95,26.77,25.4,19.75,22.32,23.56,26.21,24.39,27.94,28.68,23.12,27.15,26.21,24.81,31.19,25.28,28.22,19.65,22.53,20.86,23.45,25.09,21.82,28.72,28.68,22.2,22.83,24.87,25.35,24.89,24.15,23.4,25.68,23.8,25.14,21.77,20.59,25.01,30.12,19.71,26.42,22.22,27.39,26.02,23.05,24.25,21.97,25.15,26.67,22.3,24.11,28.39,23.98,23.99,26.86,28.08,30.86,22.83,27.15,25.76,29.66,26.81,22.09,23.95,24.3,27.45,24.48,28.73,25.25,25.18,24.74,28.57,30.4,26.88,24.5,27.1,30.64,28.66,26.87,27.08,25.39,19.94,21.21,28.62,30.99,20.56,23.01,31.64,25.39,28.25,29.68,21.21,21.33,22.36,24.65,22.1,19.04,22.86,23.79,21.63,20.06,23.24,24.14,23.24,18.59,24.74,20.17,21.61,27.29,22.2,24.14,24.38,30.02,30.73,23.23,23.66,21.63,22.86,23.83,30.75,25.47,21.56,19.11,27.99,21.51,29.17,28.08,28.04,23.95,26.44,29.55,24.91,22.31,27.33,19.57,20.32,31.16,23.24,29.47,21.6,24.16,21.09,29.33,29.27,22.67,24.16,25.72,23.5,26.29,28.04,27.53,28.51,25.1,23.3,22.94,23.83,21.71,24.14,26.98,24.77,23.3,22.06,21.64,25.0,26.5,21.5,25.97,31.67,27.33,21.79,23.92,26.36,24.96,21.15,25.18,32.0,28.08,27.06,22.06,28.99,24.28,26.75,24.15,23.73,27.33,26.21,29.76,22.22,25.3,25.05,21.63,26.6,23.03,30.4,23.04,25.61,23.35,22.49,26.07,26.17,21.77,21.46,22.4,25.1,20.28,22.2,29.98,26.86,26.47,24.67,27.17,25.3,31.57,27.21,25.34,26.72,25.59,22.2,25.25,24.22,26.36,28.4,23.51,26.56,28.08,27.33,26.67,25.45,28.05,27.08,27.68,28.08,22.65,22.27,26.49,24.02,31.97,29.33,24.03,23.73,31.8,26.06,27.35,24.53,22.79,28.51,19.59,28.72,21.14,23.14,25.05,26.17,24.5,28.25,26.12,22.86,25.95,22.57,26.39,18.51,24.69,24.75,23.53,20.07,22.77,27.39],\"xaxis\":\"x\",\"y\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815],\"yaxis\":\"y\"}], {\"legend\":{\"tracegroupgap\":0},\"margin\":{\"t\":60},\"template\":{\"data\":{\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth\"}],\"contour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"contour\"}],\"contourcarpet\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"contourcarpet\"}],\"heatmap\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmap\"}],\"heatmapgl\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmapgl\"}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"histogram2d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2d\"}],\"histogram2dcontour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2dcontour\"}],\"mesh3d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"mesh3d\"}],\"parcoords\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"parcoords\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}],\"scatter\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter\"}],\"scatter3d\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter3d\"}],\"scattercarpet\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattercarpet\"}],\"scattergeo\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergeo\"}],\"scattergl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergl\"}],\"scattermapbox\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattermapbox\"}],\"scatterpolar\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolar\"}],\"scatterpolargl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolargl\"}],\"scatterternary\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterternary\"}],\"surface\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"surface\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}]},\"layout\":{\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"autotypenumbers\":\"strict\",\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]],\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"geo\":{\"bgcolor\":\"white\",\"lakecolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"showlakes\":true,\"showland\":true,\"subunitcolor\":\"white\"},\"hoverlabel\":{\"align\":\"left\"},\"hovermode\":\"closest\",\"mapbox\":{\"style\":\"light\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"bgcolor\":\"#E5ECF6\",\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"gridwidth\":2,\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\"},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"gridwidth\":2,\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\"},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"gridwidth\":2,\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\"}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"ternary\":{\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"bgcolor\":\"#E5ECF6\",\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"title\":{\"x\":0.05},\"xaxis\":{\"automargin\":true,\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"zerolinewidth\":2},\"yaxis\":{\"automargin\":true,\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"zerolinewidth\":2}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"x\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"index\"}}}, {\"responsive\": true} ).then(function(){\n", " \n", @@ -3126,7 +3134,7 @@ }, { "cell_type": "markdown", - "id": "equal-water", + "id": "closing-affect", "metadata": { "slideshow": { "slide_type": "slide" @@ -3137,8 +3145,17 @@ "\n", "- Virtual environment support: venv and conda environment\n", "- Presentations (Reveal.js and Rise)\n", + "- Dashboarding with voila or others\n", "- ..." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "sustained-radius", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/notebooks/jupyter_practice.ipynb b/notebooks/jupyter_practice.ipynb index e2421e6..ce6afd5 100644 --- a/notebooks/jupyter_practice.ipynb +++ b/notebooks/jupyter_practice.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "expired-highway", + "id": "cultural-palestine", "metadata": {}, "source": [ "# Introduction to JupyterLab\n", @@ -16,28 +16,38 @@ "\n", "After creating a folder for the course, use `venv` to create a virtual environment named for example `sp_env`:\n", "\n", - "```python3 -m venv sp_env```\n", + "```shell\n", + "python3 -m venv sp_env\n", + "```\n", "\n", "This will create a folder `sp_env` in your working directory. The corresponding virtual environment can be activated with:\n", "\n", - "```source sp_env/bin/activate```\n", + "```shell\n", + "source sp_env/bin/activate\n", + "```\n", "\n", "You are now in a virtual environment. You can install librairies in it using pip and these will be installed specifically in this environment (and not globally on your machine). For more on virtual environment, [see the documentation](https://docs.python.org/3/library/venv.html).\n", "\n", "Once the virtal environment activated, we can start composing this environment, now with jupyterlab\n", "\n", - "```pip install jupyterlab```\n", + "```shell\n", + "pip install jupyterlab\n", + "```\n", "\n", "You can now start the jupyter server as follows:\n", "\n", - "```jupyter lab```\n", + "```shell\n", + "jupyter lab\n", + "```\n", "\n", "And open the specified URL in your internet browser (Chrome or Firefox are\n", "better supported). By default, the address will be http://localhost:8888 and you will be automatically redirected to this tab.\n", "\n", "Once all you work is done, you can exit the virtual environment with:\n", "\n", - "```deactivate```\n", + "```shell\n", + "deactivate\n", + "```\n", "\n", "You will need to reactivate it (with `source sp_env/bin/activate`) in order to use it again.\n", "\n", @@ -105,7 +115,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "billion-actress", + "id": "public-nightlife", "metadata": {}, "outputs": [ { @@ -130,7 +140,7 @@ }, { "cell_type": "markdown", - "id": "tight-spring", + "id": "marine-arctic", "metadata": {}, "source": [ "- The exclamation mark character ``!`` can be used as well to execute the following line in a bash subprocess. For example:" @@ -139,7 +149,7 @@ { "cell_type": "code", "execution_count": 21, - "id": "helpful-oasis", + "id": "considerable-fleet", "metadata": {}, "outputs": [ { @@ -156,7 +166,7 @@ }, { "cell_type": "markdown", - "id": "marked-construction", + "id": "satellite-disposal", "metadata": {}, "source": [ "- `%timeit` can be used to check for execution times:" @@ -165,7 +175,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "photographic-premises", + "id": "delayed-thunder", "metadata": {}, "outputs": [ { @@ -182,7 +192,7 @@ }, { "cell_type": "markdown", - "id": "sustained-render", + "id": "vocational-jacksonville", "metadata": {}, "source": [ "- Load more extension for the notebook, for example `autoreload` is useful extension to automatically reload a module imported in a Jupyter notebook if the module has changed locally:" @@ -191,7 +201,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "posted-pleasure", + "id": "physical-steering", "metadata": {}, "outputs": [ { @@ -210,7 +220,7 @@ }, { "cell_type": "markdown", - "id": "insured-entertainment", + "id": "regular-tiger", "metadata": {}, "source": [ "# Exercices" @@ -218,7 +228,7 @@ }, { "cell_type": "markdown", - "id": "helpful-telephone", + "id": "rotary-bouquet", "metadata": {}, "source": [ "The aim here is to get comfortable in Jupyterlab.\n", @@ -235,14 +245,14 @@ { "cell_type": "code", "execution_count": null, - "id": "congressional-light", + "id": "chinese-values", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "little-questionnaire", + "id": "useful-segment", "metadata": {}, "source": [ "## Exercise\n", @@ -258,14 +268,14 @@ { "cell_type": "code", "execution_count": null, - "id": "behavioral-ethnic", + "id": "classical-extraction", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "trained-advantage", + "id": "iraqi-wholesale", "metadata": {}, "source": [ "## Exercise\n", @@ -283,14 +293,14 @@ { "cell_type": "code", "execution_count": null, - "id": "phantom-register", + "id": "refined-relation", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "precise-average", + "id": "manufactured-treatment", "metadata": {}, "source": [ "## Exercise\n", @@ -307,14 +317,14 @@ { "cell_type": "code", "execution_count": null, - "id": "solar-auckland", + "id": "featured-converter", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "inside-approval", + "id": "constant-thriller", "metadata": {}, "source": [ "## Exercise\n", @@ -325,14 +335,14 @@ { "cell_type": "code", "execution_count": null, - "id": "worse-husband", + "id": "illegal-preserve", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "dirty-speaker", + "id": "written-bidding", "metadata": {}, "source": [ "## Exercise\n", @@ -343,14 +353,14 @@ { "cell_type": "code", "execution_count": null, - "id": "injured-thirty", + "id": "waiting-concord", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "perceived-michael", + "id": "varying-providence", "metadata": {}, "source": [ "# More documentation\n", diff --git a/notebooks/pandas_TP.ipynb b/notebooks/pandas_TP.ipynb new file mode 100644 index 0000000..ea7191c --- /dev/null +++ b/notebooks/pandas_TP.ipynb @@ -0,0 +1,481 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "separated-samba", + "metadata": {}, + "source": [ + "# <center>**TP**</center>\n", + "\n", + "<img src=\"./images/pandas_logo.svg\">\n", + "<div style=\"text-align:center\">\n", + " Bertrand Néron, François Laurent, Etienne Kornobis\n", + " <br />\n", + " <a src=\" https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/\">Bioinformatics and Biostatistiqucs HUB</a>\n", + " <br />\n", + " © Institut Pasteur, 2021\n", + "</div>" + ] + }, + { + "cell_type": "markdown", + "id": "hazardous-berry", + "metadata": {}, + "source": [ + "# Exploring Blast results" + ] + }, + { + "cell_type": "markdown", + "id": "union-charleston", + "metadata": {}, + "source": [ + "- Import the file data/blast.txt into a pandas dataframe variable (named `blast_res`). Verify that its type is a pandas\n", + "dataframe and display the dataframe in jupyterlab.\n", + "\n", + "NB: The column names for this blast format are: \"qseqid\", \"sseqid\", \"pident\", \"length\", \"mismatch\", \"gapopen\", \"qstart\", \"qend\", \"sstart\", \"send\", \"evalue\", \"bitscore\"\n", + "You going to need to pass an extra argument (`names`) to specify the names of the columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "assured-telescope", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "parallel-algorithm", + "metadata": {}, + "source": [ + "Explore ``blast_res`` dataframe:\n", + "\n", + "- Display the 5 first lines of the dataframe.\n", + "- Display the 8 last lines of the dataframe.\n", + "- Display a overall statistical description of the dataframe.\n", + "- Display the dimensions of the dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "nuclear-carrier", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "still-scheme", + "metadata": {}, + "source": [ + "- Extract 3rd line from the ``blast_res`` dataframe. Which type of data structure is returned by this extraction ?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "northern-worse", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "persistent-beijing", + "metadata": {}, + "source": [ + "- Extract the *sseqid* column from the ``blast_res`` dataframe. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "directed-brazilian", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "generic-hearts", + "metadata": {}, + "source": [ + "- Get the minimum and maximum value of a the *evalue* column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "prescription-appraisal", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "naughty-brook", + "metadata": {}, + "source": [ + "- Get the median and the mean of the *bitscore* column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "extended-chicken", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "contrary-allah", + "metadata": {}, + "source": [ + "- Filter in all hits with a percentage of identity (*pident*) superior to 75%." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "liked-shell", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "neutral-experience", + "metadata": {}, + "source": [ + "- Based on the bitscore alone, extract only the best hit(s) (i.e. the highest(s) bitscore(s))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "three-period", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "blank-digest", + "metadata": {}, + "source": [ + "- Filter in all hits which are corresponding to human hits in the database (*sseqid*)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "empirical-manhattan", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "graphic-corruption", + "metadata": {}, + "source": [ + "- Filter in all hits with a percentage of identity superior to 75% **AND** and is NOT a HUMAN hit (sseqid does not contain \"HUMAN\"). (Hint: To negate a boolean in a query you can use \"~\" in front of it)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "numerical-spread", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "boolean-verse", + "metadata": {}, + "source": [ + "- Plot a histogram of the bitscores. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "genetic-navigation", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "destroyed-velvet", + "metadata": {}, + "source": [ + "- Plot a barplot of the number of hits per species (species are considered the last code after the \"_\" in the sseqid column)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "composite-twelve", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "typical-japan", + "metadata": {}, + "source": [ + "# Extra exercise" + ] + }, + { + "cell_type": "markdown", + "id": "electronic-ferry", + "metadata": {}, + "source": [ + "- Read the 'data/city_temperature.csv'\n", + "\n", + "- Force the City datatype to string by passing `dtype={'City': str}` as argument to the function to read the file.\n", + "Don't worry to the warning, it is due to State wich contains Nan for non US contry, but we do not use these data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "verified-acceptance", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "extreme-radio", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "phantom-inclusion", + "metadata": {}, + "source": [ + "We will work only on the Europe Region, so create a datafrane named europe with only these data.\n", + "Let's explore it a little bit\n", + "* how many data?\n", + "* which columns? \n", + "* index? " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "saving-labor", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pleased-collaboration", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "tracked-addition", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "manufactured-hierarchy", + "metadata": {}, + "source": [ + "- which countries are in Europe ?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "sharing-lawsuit", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "adopted-eligibility", + "metadata": {}, + "source": [ + "- Remove the columns 'Region' and 'State' from the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "korean-nudist", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "creative-apparatus", + "metadata": {}, + "source": [ + "- From the Europe dataframe create a new dataset containing countries: 'France', 'Spain', 'Italy'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "solar-nursery", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "written-spank", + "metadata": {}, + "source": [ + "- Group the data on 'City' and 'Year' compute the mean of each group and keep only the 'AvgTemperature' column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "effective-declaration", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "stretch-diesel", + "metadata": {}, + "source": [ + "- Do the same but compute the standard deviation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "occupational-script", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "liked-mobility", + "metadata": {}, + "source": [ + "* reset the index for the mean data and std data\n", + "* rename the column AvgTemperature to Tmp on the mean data\n", + "* rename the column AvgTemperature to std on the std data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fewer-banner", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "continuing-noise", + "metadata": {}, + "source": [ + "- merge the two tables data_mean and data_std" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "noticed-southwest", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "strong-employee", + "metadata": {}, + "source": [ + "- save the data in a file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "involved-weekly", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "young-camping", + "metadata": {}, + "source": [ + "# Teasing\n", + "\n", + "a quick data plotting. we will improve it in matplotlib course" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "japanese-pierre", + "metadata": {}, + "outputs": [], + "source": [ + "for city, df in clean_data.groupby('City'):\n", + " df.plot('Year', 'Tmp', label=city)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "tough-trash", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/pandas_TP_solution.ipynb b/notebooks/pandas_TP_solution.ipynb new file mode 100644 index 0000000..ee623af --- /dev/null +++ b/notebooks/pandas_TP_solution.ipynb @@ -0,0 +1,2933 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "right-artwork", + "metadata": {}, + "source": [ + "# <center>**TP**</center>\n", + "\n", + "<img src=\"./images/pandas_logo.svg\">\n", + "<div style=\"text-align:center\">\n", + " Bertrand Néron\n", + " <br />\n", + " <a src=\" https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/\">Bioinformatics and Biostatistiqucs HUB</a>\n", + " <br />\n", + " © Institut Pasteur, 2021\n", + "</div>" + ] + }, + { + "cell_type": "markdown", + "id": "sacred-breathing", + "metadata": {}, + "source": [ + "# Exploring Blast results" + ] + }, + { + "cell_type": "markdown", + "id": "technical-crystal", + "metadata": {}, + "source": [ + "- Import the file data/blast.txt into a pandas dataframe variable (named `blast_res`). Verify that its type is a pandas\n", + "dataframe and display the dataframe in jupyterlab.\n", + "\n", + "NB: The column names for this blast format are: \"qseqid\", \"sseqid\", \"pident\", \"length\", \"mismatch\", \"gapopen\", \"qstart\", \"qend\", \"sstart\", \"send\", \"evalue\", \"bitscore\"\n", + "You going to need to pass an extra argument (`names`) to specify the names of the columns." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "recreational-seller", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "major-dream", + "metadata": {}, + "outputs": [], + "source": [ + "blast_colnames = [\"qseqid\",\"sseqid\",\"pident\",\"length\",\"mismatch\",\"gapopen\",\"qstart\",\"qend\",\"sstart\",\"send\",\"evalue\",\"bitscore\"]\n", + "blast_res = pd.read_csv(\"../data/blast.txt\", sep=\"\\t\", names=blast_colnames)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "parliamentary-heaven", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(blast_res)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "changing-drive", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O60218|AK1BA_HUMAN</td>\n", + " <td>100.00</td>\n", + " <td>316</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>654.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C9JRZ8|AK1BF_HUMAN</td>\n", + " <td>91.16</td>\n", + " <td>294</td>\n", + " <td>26</td>\n", + " <td>0</td>\n", + " <td>23</td>\n", + " <td>316</td>\n", + " <td>51</td>\n", + " <td>344</td>\n", + " <td>0.000000e+00</td>\n", + " <td>559.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O08782|ALD2_CRIGR</td>\n", + " <td>83.23</td>\n", + " <td>316</td>\n", + " <td>53</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>537.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P45377|ALD2_MOUSE</td>\n", + " <td>82.28</td>\n", + " <td>316</td>\n", + " <td>56</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>527.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P21300|ALD1_MOUSE</td>\n", + " <td>79.75</td>\n", + " <td>316</td>\n", + " <td>64</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>515.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>171</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P80874|GS69_BACSU</td>\n", + " <td>29.36</td>\n", + " <td>218</td>\n", + " <td>107</td>\n", + " <td>9</td>\n", + " <td>16</td>\n", + " <td>213</td>\n", + " <td>16</td>\n", + " <td>206</td>\n", + " <td>3.000000e-11</td>\n", + " <td>67.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>172</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q56Y42|PLR1_ARATH</td>\n", + " <td>23.00</td>\n", + " <td>313</td>\n", + " <td>178</td>\n", + " <td>10</td>\n", + " <td>16</td>\n", + " <td>285</td>\n", + " <td>50</td>\n", + " <td>342</td>\n", + " <td>6.000000e-09</td>\n", + " <td>60.1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P25906|YDBC_ECOLI</td>\n", + " <td>23.75</td>\n", + " <td>299</td>\n", + " <td>181</td>\n", + " <td>11</td>\n", + " <td>11</td>\n", + " <td>294</td>\n", + " <td>19</td>\n", + " <td>285</td>\n", + " <td>6.000000e-09</td>\n", + " <td>59.7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>174</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C6TBN2|AKR1_SOYBN</td>\n", + " <td>25.32</td>\n", + " <td>316</td>\n", + " <td>178</td>\n", + " <td>13</td>\n", + " <td>9</td>\n", + " <td>290</td>\n", + " <td>19</td>\n", + " <td>310</td>\n", + " <td>6.000000e-08</td>\n", + " <td>57.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>175</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P49261|CROB_LEPLU</td>\n", + " <td>45.90</td>\n", + " <td>61</td>\n", + " <td>20</td>\n", + " <td>1</td>\n", + " <td>95</td>\n", + " <td>155</td>\n", + " <td>15</td>\n", + " <td>62</td>\n", + " <td>1.000000e-06</td>\n", + " <td>50.1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>176 rows × 12 columns</p>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "0 AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.00 316 0 0 \n", + "1 AK1BA_HUMAN sp|C9JRZ8|AK1BF_HUMAN 91.16 294 26 0 \n", + "2 AK1BA_HUMAN sp|O08782|ALD2_CRIGR 83.23 316 53 0 \n", + "3 AK1BA_HUMAN sp|P45377|ALD2_MOUSE 82.28 316 56 0 \n", + "4 AK1BA_HUMAN sp|P21300|ALD1_MOUSE 79.75 316 64 0 \n", + ".. ... ... ... ... ... ... \n", + "171 AK1BA_HUMAN sp|P80874|GS69_BACSU 29.36 218 107 9 \n", + "172 AK1BA_HUMAN sp|Q56Y42|PLR1_ARATH 23.00 313 178 10 \n", + "173 AK1BA_HUMAN sp|P25906|YDBC_ECOLI 23.75 299 181 11 \n", + "174 AK1BA_HUMAN sp|C6TBN2|AKR1_SOYBN 25.32 316 178 13 \n", + "175 AK1BA_HUMAN sp|P49261|CROB_LEPLU 45.90 61 20 1 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "0 1 316 1 316 0.000000e+00 654.0 \n", + "1 23 316 51 344 0.000000e+00 559.0 \n", + "2 1 316 1 316 0.000000e+00 537.0 \n", + "3 1 316 1 316 0.000000e+00 527.0 \n", + "4 1 316 1 316 0.000000e+00 515.0 \n", + ".. ... ... ... ... ... ... \n", + "171 16 213 16 206 3.000000e-11 67.0 \n", + "172 16 285 50 342 6.000000e-09 60.1 \n", + "173 11 294 19 285 6.000000e-09 59.7 \n", + "174 9 290 19 310 6.000000e-08 57.0 \n", + "175 95 155 15 62 1.000000e-06 50.1 \n", + "\n", + "[176 rows x 12 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res" + ] + }, + { + "cell_type": "markdown", + "id": "productive-chorus", + "metadata": {}, + "source": [ + "Explore ``blast_res`` dataframe:\n", + "\n", + "- Display the 5 first lines of the dataframe.\n", + "- Display the 8 last lines of the dataframe.\n", + "- Display a overall statistical description of the dataframe.\n", + "- Display the dimensions of the dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "yellow-matthew", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O60218|AK1BA_HUMAN</td>\n", + " <td>100.00</td>\n", + " <td>316</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.0</td>\n", + " <td>654.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C9JRZ8|AK1BF_HUMAN</td>\n", + " <td>91.16</td>\n", + " <td>294</td>\n", + " <td>26</td>\n", + " <td>0</td>\n", + " <td>23</td>\n", + " <td>316</td>\n", + " <td>51</td>\n", + " <td>344</td>\n", + " <td>0.0</td>\n", + " <td>559.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O08782|ALD2_CRIGR</td>\n", + " <td>83.23</td>\n", + " <td>316</td>\n", + " <td>53</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.0</td>\n", + " <td>537.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P45377|ALD2_MOUSE</td>\n", + " <td>82.28</td>\n", + " <td>316</td>\n", + " <td>56</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.0</td>\n", + " <td>527.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P21300|ALD1_MOUSE</td>\n", + " <td>79.75</td>\n", + " <td>316</td>\n", + " <td>64</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.0</td>\n", + " <td>515.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "0 AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.00 316 0 0 \n", + "1 AK1BA_HUMAN sp|C9JRZ8|AK1BF_HUMAN 91.16 294 26 0 \n", + "2 AK1BA_HUMAN sp|O08782|ALD2_CRIGR 83.23 316 53 0 \n", + "3 AK1BA_HUMAN sp|P45377|ALD2_MOUSE 82.28 316 56 0 \n", + "4 AK1BA_HUMAN sp|P21300|ALD1_MOUSE 79.75 316 64 0 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "0 1 316 1 316 0.0 654.0 \n", + "1 23 316 51 344 0.0 559.0 \n", + "2 1 316 1 316 0.0 537.0 \n", + "3 1 316 1 316 0.0 527.0 \n", + "4 1 316 1 316 0.0 515.0 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "handled-details", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>168</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q94A68|Y1669_ARATH</td>\n", + " <td>24.08</td>\n", + " <td>299</td>\n", + " <td>176</td>\n", + " <td>9</td>\n", + " <td>25</td>\n", + " <td>292</td>\n", + " <td>84</td>\n", + " <td>362</td>\n", + " <td>7.000000e-15</td>\n", + " <td>77.8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>169</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P82810|MORA_RABIT</td>\n", + " <td>31.18</td>\n", + " <td>170</td>\n", + " <td>45</td>\n", + " <td>5</td>\n", + " <td>117</td>\n", + " <td>286</td>\n", + " <td>27</td>\n", + " <td>124</td>\n", + " <td>9.000000e-13</td>\n", + " <td>68.2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>170</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P46336|IOLS_BACSU</td>\n", + " <td>25.42</td>\n", + " <td>295</td>\n", + " <td>159</td>\n", + " <td>10</td>\n", + " <td>29</td>\n", + " <td>289</td>\n", + " <td>38</td>\n", + " <td>305</td>\n", + " <td>3.000000e-12</td>\n", + " <td>69.7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>171</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P80874|GS69_BACSU</td>\n", + " <td>29.36</td>\n", + " <td>218</td>\n", + " <td>107</td>\n", + " <td>9</td>\n", + " <td>16</td>\n", + " <td>213</td>\n", + " <td>16</td>\n", + " <td>206</td>\n", + " <td>3.000000e-11</td>\n", + " <td>67.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>172</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q56Y42|PLR1_ARATH</td>\n", + " <td>23.00</td>\n", + " <td>313</td>\n", + " <td>178</td>\n", + " <td>10</td>\n", + " <td>16</td>\n", + " <td>285</td>\n", + " <td>50</td>\n", + " <td>342</td>\n", + " <td>6.000000e-09</td>\n", + " <td>60.1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P25906|YDBC_ECOLI</td>\n", + " <td>23.75</td>\n", + " <td>299</td>\n", + " <td>181</td>\n", + " <td>11</td>\n", + " <td>11</td>\n", + " <td>294</td>\n", + " <td>19</td>\n", + " <td>285</td>\n", + " <td>6.000000e-09</td>\n", + " <td>59.7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>174</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C6TBN2|AKR1_SOYBN</td>\n", + " <td>25.32</td>\n", + " <td>316</td>\n", + " <td>178</td>\n", + " <td>13</td>\n", + " <td>9</td>\n", + " <td>290</td>\n", + " <td>19</td>\n", + " <td>310</td>\n", + " <td>6.000000e-08</td>\n", + " <td>57.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>175</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P49261|CROB_LEPLU</td>\n", + " <td>45.90</td>\n", + " <td>61</td>\n", + " <td>20</td>\n", + " <td>1</td>\n", + " <td>95</td>\n", + " <td>155</td>\n", + " <td>15</td>\n", + " <td>62</td>\n", + " <td>1.000000e-06</td>\n", + " <td>50.1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "168 AK1BA_HUMAN sp|Q94A68|Y1669_ARATH 24.08 299 176 9 \n", + "169 AK1BA_HUMAN sp|P82810|MORA_RABIT 31.18 170 45 5 \n", + "170 AK1BA_HUMAN sp|P46336|IOLS_BACSU 25.42 295 159 10 \n", + "171 AK1BA_HUMAN sp|P80874|GS69_BACSU 29.36 218 107 9 \n", + "172 AK1BA_HUMAN sp|Q56Y42|PLR1_ARATH 23.00 313 178 10 \n", + "173 AK1BA_HUMAN sp|P25906|YDBC_ECOLI 23.75 299 181 11 \n", + "174 AK1BA_HUMAN sp|C6TBN2|AKR1_SOYBN 25.32 316 178 13 \n", + "175 AK1BA_HUMAN sp|P49261|CROB_LEPLU 45.90 61 20 1 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "168 25 292 84 362 7.000000e-15 77.8 \n", + "169 117 286 27 124 9.000000e-13 68.2 \n", + "170 29 289 38 305 3.000000e-12 69.7 \n", + "171 16 213 16 206 3.000000e-11 67.0 \n", + "172 16 285 50 342 6.000000e-09 60.1 \n", + "173 11 294 19 285 6.000000e-09 59.7 \n", + "174 9 290 19 310 6.000000e-08 57.0 \n", + "175 95 155 15 62 1.000000e-06 50.1 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.tail(8)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "virgin-forestry", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>count</th>\n", + " <td>176.000000</td>\n", + " <td>176.000000</td>\n", + " <td>176.000000</td>\n", + " <td>176.000000</td>\n", + " <td>176.000000</td>\n", + " <td>176.000000</td>\n", + " <td>176.000000</td>\n", + " <td>176.000000</td>\n", + " <td>1.760000e+02</td>\n", + " <td>176.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mean</th>\n", + " <td>42.772500</td>\n", + " <td>300.250000</td>\n", + " <td>149.465909</td>\n", + " <td>4.767045</td>\n", + " <td>6.948864</td>\n", + " <td>299.079545</td>\n", + " <td>9.198864</td>\n", + " <td>293.971591</td>\n", + " <td>6.091102e-09</td>\n", + " <td>231.952841</td>\n", + " </tr>\n", + " <tr>\n", + " <th>std</th>\n", + " <td>12.397842</td>\n", + " <td>29.162108</td>\n", + " <td>30.489511</td>\n", + " <td>2.659161</td>\n", + " <td>11.909789</td>\n", + " <td>22.140446</td>\n", + " <td>9.583331</td>\n", + " <td>35.601834</td>\n", + " <td>7.548480e-08</td>\n", + " <td>104.060644</td>\n", + " </tr>\n", + " <tr>\n", + " <th>min</th>\n", + " <td>23.000000</td>\n", + " <td>61.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>1.000000</td>\n", + " <td>118.000000</td>\n", + " <td>1.000000</td>\n", + " <td>62.000000</td>\n", + " <td>0.000000e+00</td>\n", + " <td>50.100000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25%</th>\n", + " <td>34.845000</td>\n", + " <td>294.000000</td>\n", + " <td>150.000000</td>\n", + " <td>2.000000</td>\n", + " <td>3.000000</td>\n", + " <td>294.000000</td>\n", + " <td>4.000000</td>\n", + " <td>272.000000</td>\n", + " <td>8.750000e-100</td>\n", + " <td>167.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50%</th>\n", + " <td>40.230000</td>\n", + " <td>303.000000</td>\n", + " <td>159.000000</td>\n", + " <td>5.000000</td>\n", + " <td>5.000000</td>\n", + " <td>297.000000</td>\n", + " <td>8.000000</td>\n", + " <td>302.500000</td>\n", + " <td>1.000000e-61</td>\n", + " <td>205.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75%</th>\n", + " <td>47.980000</td>\n", + " <td>316.000000</td>\n", + " <td>164.000000</td>\n", + " <td>6.000000</td>\n", + " <td>5.000000</td>\n", + " <td>316.000000</td>\n", + " <td>11.000000</td>\n", + " <td>320.000000</td>\n", + " <td>8.000000e-48</td>\n", + " <td>303.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>max</th>\n", + " <td>100.000000</td>\n", + " <td>329.000000</td>\n", + " <td>181.000000</td>\n", + " <td>13.000000</td>\n", + " <td>117.000000</td>\n", + " <td>316.000000</td>\n", + " <td>84.000000</td>\n", + " <td>362.000000</td>\n", + " <td>1.000000e-06</td>\n", + " <td>654.000000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " pident length mismatch gapopen qstart qend \\\n", + "count 176.000000 176.000000 176.000000 176.000000 176.000000 176.000000 \n", + "mean 42.772500 300.250000 149.465909 4.767045 6.948864 299.079545 \n", + "std 12.397842 29.162108 30.489511 2.659161 11.909789 22.140446 \n", + "min 23.000000 61.000000 0.000000 0.000000 1.000000 118.000000 \n", + "25% 34.845000 294.000000 150.000000 2.000000 3.000000 294.000000 \n", + "50% 40.230000 303.000000 159.000000 5.000000 5.000000 297.000000 \n", + "75% 47.980000 316.000000 164.000000 6.000000 5.000000 316.000000 \n", + "max 100.000000 329.000000 181.000000 13.000000 117.000000 316.000000 \n", + "\n", + " sstart send evalue bitscore \n", + "count 176.000000 176.000000 1.760000e+02 176.000000 \n", + "mean 9.198864 293.971591 6.091102e-09 231.952841 \n", + "std 9.583331 35.601834 7.548480e-08 104.060644 \n", + "min 1.000000 62.000000 0.000000e+00 50.100000 \n", + "25% 4.000000 272.000000 8.750000e-100 167.000000 \n", + "50% 8.000000 302.500000 1.000000e-61 205.000000 \n", + "75% 11.000000 320.000000 8.000000e-48 303.000000 \n", + "max 84.000000 362.000000 1.000000e-06 654.000000 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "superb-papua", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(176, 12)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.shape" + ] + }, + { + "cell_type": "markdown", + "id": "fourth-pennsylvania", + "metadata": {}, + "source": [ + "- Extract 3rd line from the ``blast_res`` dataframe. Which type of data structure is returned by this extraction ?" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "binding-interest", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "qseqid AK1BA_HUMAN\n", + "sseqid sp|O08782|ALD2_CRIGR\n", + "pident 83.23\n", + "length 316\n", + "mismatch 53\n", + "gapopen 0\n", + "qstart 1\n", + "qend 316\n", + "sstart 1\n", + "send 316\n", + "evalue 0.0\n", + "bitscore 537.0\n", + "Name: 2, dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.iloc[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "careful-dining", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(blast_res.iloc[2])" + ] + }, + { + "cell_type": "markdown", + "id": "common-sixth", + "metadata": {}, + "source": [ + "- Extract the *sseqid* column from the ``blast_res`` dataframe. " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "located-waters", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 sp|O60218|AK1BA_HUMAN\n", + "1 sp|C9JRZ8|AK1BF_HUMAN\n", + "2 sp|O08782|ALD2_CRIGR\n", + "3 sp|P45377|ALD2_MOUSE\n", + "4 sp|P21300|ALD1_MOUSE\n", + " ... \n", + "171 sp|P80874|GS69_BACSU\n", + "172 sp|Q56Y42|PLR1_ARATH\n", + "173 sp|P25906|YDBC_ECOLI\n", + "174 sp|C6TBN2|AKR1_SOYBN\n", + "175 sp|P49261|CROB_LEPLU\n", + "Name: sseqid, Length: 176, dtype: object" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.sseqid\n", + "# OR\n", + "blast_res['sseqid']\n", + "# OR\n", + "blast_res.loc[:,'sseqid']" + ] + }, + { + "cell_type": "markdown", + "id": "searching-coach", + "metadata": {}, + "source": [ + "- Get the minimum and maximum value of a the *evalue* column." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "square-airplane", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.evalue.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "innovative-audio", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1e-06" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.evalue.max()" + ] + }, + { + "cell_type": "markdown", + "id": "broad-password", + "metadata": {}, + "source": [ + "- Get the median and the mean of the *bitscore* column." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "tamil-aggregate", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "205.0" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.bitscore.median()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "sitting-metallic", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "231.9528409090909" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.bitscore.mean()" + ] + }, + { + "cell_type": "markdown", + "id": "excessive-tournament", + "metadata": {}, + "source": [ + "- Filter in all hits with a percentage of identity (*pident*) superior to 75%." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "duplicate-ghana", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O60218|AK1BA_HUMAN</td>\n", + " <td>100.00</td>\n", + " <td>316</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>654.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C9JRZ8|AK1BF_HUMAN</td>\n", + " <td>91.16</td>\n", + " <td>294</td>\n", + " <td>26</td>\n", + " <td>0</td>\n", + " <td>23</td>\n", + " <td>316</td>\n", + " <td>51</td>\n", + " <td>344</td>\n", + " <td>0.000000e+00</td>\n", + " <td>559.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O08782|ALD2_CRIGR</td>\n", + " <td>83.23</td>\n", + " <td>316</td>\n", + " <td>53</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>537.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P45377|ALD2_MOUSE</td>\n", + " <td>82.28</td>\n", + " <td>316</td>\n", + " <td>56</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>527.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P21300|ALD1_MOUSE</td>\n", + " <td>79.75</td>\n", + " <td>316</td>\n", + " <td>64</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>515.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q5RJP0|ALD1_RAT</td>\n", + " <td>78.16</td>\n", + " <td>316</td>\n", + " <td>69</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>2.000000e-177</td>\n", + " <td>501.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "0 AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.00 316 0 0 \n", + "1 AK1BA_HUMAN sp|C9JRZ8|AK1BF_HUMAN 91.16 294 26 0 \n", + "2 AK1BA_HUMAN sp|O08782|ALD2_CRIGR 83.23 316 53 0 \n", + "3 AK1BA_HUMAN sp|P45377|ALD2_MOUSE 82.28 316 56 0 \n", + "4 AK1BA_HUMAN sp|P21300|ALD1_MOUSE 79.75 316 64 0 \n", + "5 AK1BA_HUMAN sp|Q5RJP0|ALD1_RAT 78.16 316 69 0 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "0 1 316 1 316 0.000000e+00 654.0 \n", + "1 23 316 51 344 0.000000e+00 559.0 \n", + "2 1 316 1 316 0.000000e+00 537.0 \n", + "3 1 316 1 316 0.000000e+00 527.0 \n", + "4 1 316 1 316 0.000000e+00 515.0 \n", + "5 1 316 1 316 2.000000e-177 501.0 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.loc[blast_res.pident > 75]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "developing-browser", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O60218|AK1BA_HUMAN</td>\n", + " <td>100.00</td>\n", + " <td>316</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>654.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C9JRZ8|AK1BF_HUMAN</td>\n", + " <td>91.16</td>\n", + " <td>294</td>\n", + " <td>26</td>\n", + " <td>0</td>\n", + " <td>23</td>\n", + " <td>316</td>\n", + " <td>51</td>\n", + " <td>344</td>\n", + " <td>0.000000e+00</td>\n", + " <td>559.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O08782|ALD2_CRIGR</td>\n", + " <td>83.23</td>\n", + " <td>316</td>\n", + " <td>53</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>537.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P45377|ALD2_MOUSE</td>\n", + " <td>82.28</td>\n", + " <td>316</td>\n", + " <td>56</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>527.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P21300|ALD1_MOUSE</td>\n", + " <td>79.75</td>\n", + " <td>316</td>\n", + " <td>64</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>515.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q5RJP0|ALD1_RAT</td>\n", + " <td>78.16</td>\n", + " <td>316</td>\n", + " <td>69</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>2.000000e-177</td>\n", + " <td>501.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "0 AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.00 316 0 0 \n", + "1 AK1BA_HUMAN sp|C9JRZ8|AK1BF_HUMAN 91.16 294 26 0 \n", + "2 AK1BA_HUMAN sp|O08782|ALD2_CRIGR 83.23 316 53 0 \n", + "3 AK1BA_HUMAN sp|P45377|ALD2_MOUSE 82.28 316 56 0 \n", + "4 AK1BA_HUMAN sp|P21300|ALD1_MOUSE 79.75 316 64 0 \n", + "5 AK1BA_HUMAN sp|Q5RJP0|ALD1_RAT 78.16 316 69 0 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "0 1 316 1 316 0.000000e+00 654.0 \n", + "1 23 316 51 344 0.000000e+00 559.0 \n", + "2 1 316 1 316 0.000000e+00 537.0 \n", + "3 1 316 1 316 0.000000e+00 527.0 \n", + "4 1 316 1 316 0.000000e+00 515.0 \n", + "5 1 316 1 316 2.000000e-177 501.0 " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# OR \n", + "blast_res.query(\"pident > 75\")" + ] + }, + { + "cell_type": "markdown", + "id": "nonprofit-fitting", + "metadata": {}, + "source": [ + "- Based on the bitscore alone, extract only the best hit(s) (i.e. the highest(s) bitscore(s))." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "chronic-wallace", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O60218|AK1BA_HUMAN</td>\n", + " <td>100.0</td>\n", + " <td>316</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.0</td>\n", + " <td>654.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "0 AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.0 316 0 0 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "0 1 316 1 316 0.0 654.0 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Getting the highest bitscore value\n", + "max_bitscore = blast_res.bitscore.max()\n", + "# Extracting all the rows with a bitscore equal to the maximum bitscore\n", + "blast_res.loc[blast_res.bitscore == max_bitscore]" + ] + }, + { + "cell_type": "markdown", + "id": "saving-homeless", + "metadata": {}, + "source": [ + "- Filter in all hits which are corresponding to human hits in the database (*sseqid*)." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "western-language", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O60218|AK1BA_HUMAN</td>\n", + " <td>100.00</td>\n", + " <td>316</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>654.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C9JRZ8|AK1BF_HUMAN</td>\n", + " <td>91.16</td>\n", + " <td>294</td>\n", + " <td>26</td>\n", + " <td>0</td>\n", + " <td>23</td>\n", + " <td>316</td>\n", + " <td>51</td>\n", + " <td>344</td>\n", + " <td>0.000000e+00</td>\n", + " <td>559.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P15121|ALDR_HUMAN</td>\n", + " <td>70.57</td>\n", + " <td>316</td>\n", + " <td>93</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1.000000e-160</td>\n", + " <td>458.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q96JD6|AKCL2_HUMAN</td>\n", + " <td>54.46</td>\n", + " <td>325</td>\n", + " <td>123</td>\n", + " <td>3</td>\n", + " <td>11</td>\n", + " <td>316</td>\n", + " <td>2</td>\n", + " <td>320</td>\n", + " <td>2.000000e-117</td>\n", + " <td>348.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P51857|AK1D1_HUMAN</td>\n", + " <td>50.79</td>\n", + " <td>317</td>\n", + " <td>151</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>10</td>\n", + " <td>326</td>\n", + " <td>8.000000e-111</td>\n", + " <td>331.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P14550|AK1A1_HUMAN</td>\n", + " <td>48.92</td>\n", + " <td>325</td>\n", + " <td>154</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>316</td>\n", + " <td>3</td>\n", + " <td>325</td>\n", + " <td>4.000000e-106</td>\n", + " <td>319.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P52895|AK1C2_HUMAN</td>\n", + " <td>48.73</td>\n", + " <td>316</td>\n", + " <td>158</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>9.000000e-103</td>\n", + " <td>311.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P17516|AK1C4_HUMAN</td>\n", + " <td>48.10</td>\n", + " <td>316</td>\n", + " <td>160</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>1.000000e-101</td>\n", + " <td>308.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q04828|AK1C1_HUMAN</td>\n", + " <td>48.10</td>\n", + " <td>316</td>\n", + " <td>160</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>1.000000e-101</td>\n", + " <td>308.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P42330|AK1C3_HUMAN</td>\n", + " <td>47.47</td>\n", + " <td>316</td>\n", + " <td>162</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>9.000000e-100</td>\n", + " <td>303.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>161</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q5T2L2|AKCL1_HUMAN</td>\n", + " <td>49.57</td>\n", + " <td>117</td>\n", + " <td>56</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>118</td>\n", + " <td>11</td>\n", + " <td>127</td>\n", + " <td>3.000000e-30</td>\n", + " <td>116.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "0 AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.00 316 0 0 \n", + "1 AK1BA_HUMAN sp|C9JRZ8|AK1BF_HUMAN 91.16 294 26 0 \n", + "8 AK1BA_HUMAN sp|P15121|ALDR_HUMAN 70.57 316 93 0 \n", + "14 AK1BA_HUMAN sp|Q96JD6|AKCL2_HUMAN 54.46 325 123 3 \n", + "19 AK1BA_HUMAN sp|P51857|AK1D1_HUMAN 50.79 317 151 2 \n", + "25 AK1BA_HUMAN sp|P14550|AK1A1_HUMAN 48.92 325 154 3 \n", + "31 AK1BA_HUMAN sp|P52895|AK1C2_HUMAN 48.73 316 158 2 \n", + "35 AK1BA_HUMAN sp|P17516|AK1C4_HUMAN 48.10 316 160 2 \n", + "36 AK1BA_HUMAN sp|Q04828|AK1C1_HUMAN 48.10 316 160 2 \n", + "45 AK1BA_HUMAN sp|P42330|AK1C3_HUMAN 47.47 316 162 2 \n", + "161 AK1BA_HUMAN sp|Q5T2L2|AKCL1_HUMAN 49.57 117 56 1 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "0 1 316 1 316 0.000000e+00 654.0 \n", + "1 23 316 51 344 0.000000e+00 559.0 \n", + "8 1 316 1 316 1.000000e-160 458.0 \n", + "14 11 316 2 320 2.000000e-117 348.0 \n", + "19 5 316 10 326 8.000000e-111 331.0 \n", + "25 2 316 3 325 4.000000e-106 319.0 \n", + "31 5 316 8 323 9.000000e-103 311.0 \n", + "35 5 316 8 323 1.000000e-101 308.0 \n", + "36 5 316 8 323 1.000000e-101 308.0 \n", + "45 5 316 8 323 9.000000e-100 303.0 \n", + "161 5 118 11 127 3.000000e-30 116.0 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This could be done with list comprehension creating a list of Booleans \n", + "blast_res.loc[[\"HUMAN\" in x for x in blast_res.sseqid]]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "taken-palmer", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O60218|AK1BA_HUMAN</td>\n", + " <td>100.00</td>\n", + " <td>316</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>654.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|C9JRZ8|AK1BF_HUMAN</td>\n", + " <td>91.16</td>\n", + " <td>294</td>\n", + " <td>26</td>\n", + " <td>0</td>\n", + " <td>23</td>\n", + " <td>316</td>\n", + " <td>51</td>\n", + " <td>344</td>\n", + " <td>0.000000e+00</td>\n", + " <td>559.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P15121|ALDR_HUMAN</td>\n", + " <td>70.57</td>\n", + " <td>316</td>\n", + " <td>93</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1.000000e-160</td>\n", + " <td>458.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q96JD6|AKCL2_HUMAN</td>\n", + " <td>54.46</td>\n", + " <td>325</td>\n", + " <td>123</td>\n", + " <td>3</td>\n", + " <td>11</td>\n", + " <td>316</td>\n", + " <td>2</td>\n", + " <td>320</td>\n", + " <td>2.000000e-117</td>\n", + " <td>348.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P51857|AK1D1_HUMAN</td>\n", + " <td>50.79</td>\n", + " <td>317</td>\n", + " <td>151</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>10</td>\n", + " <td>326</td>\n", + " <td>8.000000e-111</td>\n", + " <td>331.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P14550|AK1A1_HUMAN</td>\n", + " <td>48.92</td>\n", + " <td>325</td>\n", + " <td>154</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>316</td>\n", + " <td>3</td>\n", + " <td>325</td>\n", + " <td>4.000000e-106</td>\n", + " <td>319.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P52895|AK1C2_HUMAN</td>\n", + " <td>48.73</td>\n", + " <td>316</td>\n", + " <td>158</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>9.000000e-103</td>\n", + " <td>311.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P17516|AK1C4_HUMAN</td>\n", + " <td>48.10</td>\n", + " <td>316</td>\n", + " <td>160</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>1.000000e-101</td>\n", + " <td>308.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q04828|AK1C1_HUMAN</td>\n", + " <td>48.10</td>\n", + " <td>316</td>\n", + " <td>160</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>1.000000e-101</td>\n", + " <td>308.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P42330|AK1C3_HUMAN</td>\n", + " <td>47.47</td>\n", + " <td>316</td>\n", + " <td>162</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>316</td>\n", + " <td>8</td>\n", + " <td>323</td>\n", + " <td>9.000000e-100</td>\n", + " <td>303.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>161</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q5T2L2|AKCL1_HUMAN</td>\n", + " <td>49.57</td>\n", + " <td>117</td>\n", + " <td>56</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>118</td>\n", + " <td>11</td>\n", + " <td>127</td>\n", + " <td>3.000000e-30</td>\n", + " <td>116.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "0 AK1BA_HUMAN sp|O60218|AK1BA_HUMAN 100.00 316 0 0 \n", + "1 AK1BA_HUMAN sp|C9JRZ8|AK1BF_HUMAN 91.16 294 26 0 \n", + "8 AK1BA_HUMAN sp|P15121|ALDR_HUMAN 70.57 316 93 0 \n", + "14 AK1BA_HUMAN sp|Q96JD6|AKCL2_HUMAN 54.46 325 123 3 \n", + "19 AK1BA_HUMAN sp|P51857|AK1D1_HUMAN 50.79 317 151 2 \n", + "25 AK1BA_HUMAN sp|P14550|AK1A1_HUMAN 48.92 325 154 3 \n", + "31 AK1BA_HUMAN sp|P52895|AK1C2_HUMAN 48.73 316 158 2 \n", + "35 AK1BA_HUMAN sp|P17516|AK1C4_HUMAN 48.10 316 160 2 \n", + "36 AK1BA_HUMAN sp|Q04828|AK1C1_HUMAN 48.10 316 160 2 \n", + "45 AK1BA_HUMAN sp|P42330|AK1C3_HUMAN 47.47 316 162 2 \n", + "161 AK1BA_HUMAN sp|Q5T2L2|AKCL1_HUMAN 49.57 117 56 1 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "0 1 316 1 316 0.000000e+00 654.0 \n", + "1 23 316 51 344 0.000000e+00 559.0 \n", + "8 1 316 1 316 1.000000e-160 458.0 \n", + "14 11 316 2 320 2.000000e-117 348.0 \n", + "19 5 316 10 326 8.000000e-111 331.0 \n", + "25 2 316 3 325 4.000000e-106 319.0 \n", + "31 5 316 8 323 9.000000e-103 311.0 \n", + "35 5 316 8 323 1.000000e-101 308.0 \n", + "36 5 316 8 323 1.000000e-101 308.0 \n", + "45 5 316 8 323 9.000000e-100 303.0 \n", + "161 5 118 11 127 3.000000e-30 116.0 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# But pandas as a specific syntax to make operation on strings in a Serie: the method str and its method contains\n", + "blast_res.loc[blast_res.sseqid.str.contains(\"HUMAN\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "tracked-reform", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>qseqid</th>\n", + " <th>sseqid</th>\n", + " <th>pident</th>\n", + " <th>length</th>\n", + " <th>mismatch</th>\n", + " <th>gapopen</th>\n", + " <th>qstart</th>\n", + " <th>qend</th>\n", + " <th>sstart</th>\n", + " <th>send</th>\n", + " <th>evalue</th>\n", + " <th>bitscore</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|O08782|ALD2_CRIGR</td>\n", + " <td>83.23</td>\n", + " <td>316</td>\n", + " <td>53</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>537.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P45377|ALD2_MOUSE</td>\n", + " <td>82.28</td>\n", + " <td>316</td>\n", + " <td>56</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>527.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|P21300|ALD1_MOUSE</td>\n", + " <td>79.75</td>\n", + " <td>316</td>\n", + " <td>64</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>0.000000e+00</td>\n", + " <td>515.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>AK1BA_HUMAN</td>\n", + " <td>sp|Q5RJP0|ALD1_RAT</td>\n", + " <td>78.16</td>\n", + " <td>316</td>\n", + " <td>69</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>1</td>\n", + " <td>316</td>\n", + " <td>2.000000e-177</td>\n", + " <td>501.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " qseqid sseqid pident length mismatch gapopen \\\n", + "2 AK1BA_HUMAN sp|O08782|ALD2_CRIGR 83.23 316 53 0 \n", + "3 AK1BA_HUMAN sp|P45377|ALD2_MOUSE 82.28 316 56 0 \n", + "4 AK1BA_HUMAN sp|P21300|ALD1_MOUSE 79.75 316 64 0 \n", + "5 AK1BA_HUMAN sp|Q5RJP0|ALD1_RAT 78.16 316 69 0 \n", + "\n", + " qstart qend sstart send evalue bitscore \n", + "2 1 316 1 316 0.000000e+00 537.0 \n", + "3 1 316 1 316 0.000000e+00 527.0 \n", + "4 1 316 1 316 0.000000e+00 515.0 \n", + "5 1 316 1 316 2.000000e-177 501.0 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "blast_res.query(\"~sseqid.str.contains('HUMAN') & pident > 75\")" + ] + }, + { + "cell_type": "markdown", + "id": "reliable-dream", + "metadata": {}, + "source": [ + "- Plot a histogram of the bitscores. " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "suspected-substance", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<AxesSubplot:>" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD7CAYAAABzGc+QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAARxUlEQVR4nO3db2xdd33H8feXpIVQlyah5coKaGZa1K3CozQWf5QJ2YSyQivSBysCAXKnIj8BVLSgkQ5pEw+mZRtF4wGaFgGbJRimKu0StRIsyuohJAbE0OKWtAsDryQt9ihJwV01Fvbdg3tSLv6Te33/2T/3/ZKu7jm/e47P9+vrfHLuz/dcR2YiSSrPC9a7AElSewxwSSqUAS5JhTLAJalQBrgkFcoAl6RCNQ3wiLg6Ih5suP0sIj4UETsj4lhEnKrud/SjYElSXazlfeARsQU4A7wOeD/w08w8FBEHgR2Z+ZHelClJWmqtAf4W4M8yc29EPAaMZuaTETEITGfm1Rfb/8orr8yhoaGOCu6lZ555hssuu2y9y+iKzdLLZukDNk8v9tF/MzMzP8nMq5aOb13j13kn8IVquZaZTwJUIf6ylXaIiAlgAqBWq/Hxj398jYfsn8XFRQYGBta7jK7YLL1slj5g8/RiH/03Njb2nys+kJkt3YBLgZ9QD26Ac0seP9vsa+zZsyc3sgceeGC9S+iazdLLZukjc/P0Yh/9B5zIFTJ1Le9CeSvw7cycr9bnq6kTqvuFdv5nkSS1Zy0B/i5+NX0CcBQYr5bHgSPdKkqS1FxLAR4RLwauB+5pGD4EXB8Rp6rHDnW/PEnSalr6JWZm/jfw0iVjTwH7elGUJKk5r8SUpEIZ4JJUKANckgplgEtSodZ6Jab6aOjg/W3ve2D4PLe2uf/coRvbPq6k/vEMXJIKZYBLUqEMcEkqlAEuSYUywCWpUAa4JBXKAJekQhngklQoA1ySCmWAS1KhDHBJKpQBLkmFMsAlqVAGuCQVygCXpEIZ4JJUKANckgrVUoBHxPaIuDsiHo2IkxHxhojYGRHHIuJUdb+j18VKkn6l1TPwTwJfzszfBl4NnAQOAsczczdwvFqXJPVJ0wCPiJcAbwQ+A5CZv8jMc8B+YLLabBK4uTclSpJWEpl58Q0irgUOA9+jfvY9A9wOnMnM7Q3bnc3MZdMoETEBTADUarU9U1NT3aq96xYXFxkYGFjvMp4ze+bptvetbYP5Z9vbd3jXFW0ft9s22nPSic3Si33039jY2ExmjiwdbyXAR4B/A/Zm5jci4pPAz4APthLgjUZGRvLEiRPt1N8X09PTjI6OrncZz+n0r9LfObu1rX030l+l32jPSSc2Sy/20X8RsWKAtzIHfho4nZnfqNbvBq4D5iNisPrig8BCt4qVJDXXNMAz88fAjyLi6mpoH/XplKPAeDU2DhzpSYWSpBW1+hr7g8DnI+JS4AfAH1IP/7si4jbgceCW3pQoSVpJSwGemQ8Cy+ZfqJ+NS5LWgVdiSlKhDHBJKpQBLkmFMsAlqVAGuCQVygCXpEIZ4JJUKANckgplgEtSoQxwSSqUAS5JhTLAJalQBrgkFcoAl6RCGeCSVCgDXJIKZYBLUqEMcEkqlAEuSYUywCWpUAa4JBXKAJekQm1tZaOImAN+DvwSOJ+ZIxGxE/giMATMAe/IzLO9KVOStNRazsDHMvPazByp1g8CxzNzN3C8Wpck9UknUyj7gclqeRK4ueNqJEkti8xsvlHED4GzQAJ/l5mHI+JcZm5v2OZsZu5YYd8JYAKgVqvtmZqa6lbtXbe4uMjAwMB6l/Gc2TNPt71vbRvMP9vevsO7rmj7uN220Z6TTmyWXuyj/8bGxmYaZj+e09IcOLA3M5+IiJcBxyLi0VYPnJmHgcMAIyMjOTo62uqufTc9Pc1Gqu/Wg/e3ve+B4fPcOdvq0/vr5t492vZxu22jPSed2Cy92MfG0dIUSmY+Ud0vAPcCrwXmI2IQoLpf6FWRkqTlmgZ4RFwWEZdfWAbeAjwMHAXGq83GgSO9KlKStFwrr7FrwL0RcWH7f8zML0fEt4C7IuI24HHglt6VKUlaqmmAZ+YPgFevMP4UsK8XRUmSmvNKTEkqlAEuSYUywCWpUAa4JBXKAJekQhngklQoA1ySCmWAS1KhDHBJKpQBLkmFMsAlqVAGuCQVygCXpEIZ4JJUKANckgplgEtSoQxwSSqUAS5JhTLAJalQBrgkFcoAl6RCGeCSVKiWAzwitkTEdyLivmp9Z0Qci4hT1f2O3pUpSVpqLWfgtwMnG9YPAsczczdwvFqXJPVJSwEeES8HbgQ+3TC8H5islieBm7tamSTpoiIzm28UcTfwF8DlwIcz86aIOJeZ2xu2OZuZy6ZRImICmACo1Wp7pqamulV71y0uLjIwMLDeZTxn9szTbe9b2wbzz7a37/CuK9o+brdttOekE5ulF/vov7GxsZnMHFk6vrXZjhFxE7CQmTMRMbrWA2fmYeAwwMjISI6OrvlL9M309DQbqb5bD97f9r4Hhs9z52zTp3dFc+8ebfu43bbRnpNObJZe7GPjaOVf+F7g7RHxNuBFwEsi4nPAfEQMZuaTETEILPSyUEnSr2sa4Jl5B3AHQHUG/uHMfE9E/DUwDhyq7o/0rkw9XwwtedVxYPh8R69EWjV36MaeH0Pqtk7eB34IuD4iTgHXV+uSpD5Z0yRpZk4D09XyU8C+7pckSWqFV2JKUqEMcEkqlAEuSYUywCWpUAa4JBXKAJekQhngklSo9j4s43lm6dWBkrQReAYuSYUywCWpUAa4JBXKAJekQhngklQoA1ySCmWAS1KhDHBJKpQBLkmFMsAlqVAGuCQVygCXpEIZ4JJUKANckgrVNMAj4kUR8c2IeCgiHomIj1XjOyPiWEScqu539L5cSdIFrZyB/w/wpsx8NXAtcENEvB44CBzPzN3A8WpdktQnTQM86xar1UuqWwL7gclqfBK4uRcFSpJWFpnZfKOILcAM8FvApzLzIxFxLjO3N2xzNjOXTaNExAQwAVCr1fZMTU11q/auW1xcZGBgYNn47Jmn16GaztS2wfyz7e07vOuK7hazBku/1530sRb96Hm1n6/S2Ef/jY2NzWTmyNLxlgL8uY0jtgP3Ah8EvtZKgDcaGRnJEydOtHy8fpuenmZ0dHTZeIl/Uu3A8HnunG3vL+bNHbqxy9W0bun3upM+1qIfPa/281Ua++i/iFgxwNf0LpTMPAdMAzcA8xExWH3xQWCh8zIlSa1q5V0oV1Vn3kTENuDNwKPAUWC82mwcONKjGiVJK2jltekgMFnNg78AuCsz74uIrwN3RcRtwOPALT2sU5K0RNMAz8zvAq9ZYfwpYF8vipIkNeeVmJJUKANckgplgEtSoQxwSSqUAS5JhTLAJalQBrgkFcoAl6RCGeCSVCgDXJIKZYBLUqEMcEkqlAEuSYUywCWpUAa4JBXKAJekQhngklQoA1ySCmWAS1KhDHBJKpQBLkmFMsAlqVBNAzwiXhERD0TEyYh4JCJur8Z3RsSxiDhV3e/ofbmSpAtaOQM/DxzIzN8BXg+8PyKuAQ4CxzNzN3C8Wpck9UnTAM/MJzPz29Xyz4GTwC5gPzBZbTYJ3NyjGiVJK4jMbH3jiCHgq8CrgMczc3vDY2czc9k0SkRMABMAtVptz9TUVIcl987i4iIDAwPLxmfPPL0O1XSmtg3mn21v3+FdV3S3mDVY+r3upI+16EfPq/18lcY++m9sbGwmM0eWjrcc4BExAPwr8OeZeU9EnGslwBuNjIzkiRMn1lZ5H01PTzM6OrpsfOjg/f0vpkMHhs9z5+zWtvadO3Rjl6tp3dLvdSd9rEU/el7t56s09tF/EbFigLf0LpSIuAT4EvD5zLynGp6PiMHq8UFgoVvFSpKaa+VdKAF8BjiZmZ9oeOgoMF4tjwNHul+eJGk1rbw23Qu8F5iNiAersT8BDgF3RcRtwOPALT2pUJK0oqYBnplfA2KVh/d1txxJUqu8ElOSCmWAS1KhDHBJKpQBLkmFMsAlqVAGuCQVygCXpEIZ4JJUKANckgplgEtSoQxwSSqUAS5JhTLAJalQBrgkFcoAl6RCGeCSVCgDXJIKZYBLUqEMcEkqlAEuSYUywCWpUAa4JBVqa7MNIuKzwE3AQma+qhrbCXwRGALmgHdk5tnelQlDB+/v5ZcH4MDweW7tw3EkqRtaOQP/B+CGJWMHgeOZuRs4Xq1LkvqoaYBn5leBny4Z3g9MVsuTwM3dLUuS1ExkZvONIoaA+xqmUM5l5vaGx89m5o5V9p0AJgBqtdqeqamptgqdPfN0W/utRW0bzD/b88P0RSe9DO+6orvFrMHS57lfz0k/el5cXGRgYKDnx+k1++i/sbGxmcwcWTredA68U5l5GDgMMDIykqOjo219nX7MTR8YPs+dsz3/lvRFJ73MvXu0u8WswdLnuV/PST96np6ept2f/43EPjaOdt+FMh8RgwDV/UL3SpIktaLdU5ujwDhwqLo/0rWKtO768Y4fSZ1regYeEV8Avg5cHRGnI+I26sF9fUScAq6v1iVJfdT0DDwz37XKQ/u6XIskaQ28ElOSCmWAS1KhDHBJKpQBLkmFMsAlqVAGuCQVanNcNy4VbL0unJo7dOO6HFfd4xm4JBXKAJekQhngklQoA1ySCmWAS1KhDHBJKpQBLkmFMsAlqVAGuCQVyisxpeepdq8APTB8vuM/Mu5VoN3hGbgkFcoAl6RCOYUi0Z8PlOrG1MNm4Qd4dYdn4JJUKANckgrV0RRKRNwAfBLYAnw6Mw91pSpJ6oHGqZt+T2n1Yvqm7TPwiNgCfAp4K3AN8K6IuKZbhUmSLq6TKZTXAt/PzB9k5i+AKWB/d8qSJDUTmdnejhF/ANyQme+r1t8LvC4zP7Bkuwlgolq9Gnis/XJ77krgJ+tdRJdsll42Sx+weXqxj/77jcy8aulgJ3PgscLYsv8NMvMwcLiD4/RNRJzIzJH1rqMbNksvm6UP2Dy92MfG0ckUymngFQ3rLwee6KwcSVKrOgnwbwG7I+KVEXEp8E7gaHfKkiQ10/YUSmaej4gPAF+h/jbCz2bmI12rbH0UMdXTos3Sy2bpAzZPL/axQbT9S0xJ0vrySkxJKpQBLkmFel4FeER8NiIWIuLhhrGdEXEsIk5V9zsaHrsjIr4fEY9FxO+vT9XLRcQrIuKBiDgZEY9ExO3VeFG9RMSLIuKbEfFQ1cfHqvGi+rggIrZExHci4r5qvdQ+5iJiNiIejIgT1VipvWyPiLsj4tHq38sbSu1lRZn5vLkBbwSuAx5uGPsr4GC1fBD4y2r5GuAh4IXAK4H/ALasdw9VbYPAddXy5cC/V/UW1Qv1awkGquVLgG8Ary+tj4Z+/gj4R+C+Un+2qvrmgCuXjJXayyTwvmr5UmB7qb2s2N96F7AOT+jQkgB/DBislgeBx6rlO4A7Grb7CvCG9a5/lZ6OANeX3AvwYuDbwOtK7IP6dRDHgTc1BHhxfVT1rBTgxfUCvAT4IdWbNUruZbXb82oKZRW1zHwSoLp/WTW+C/hRw3anq7ENJSKGgNdQP3strpdq2uFBYAE4lplF9gH8DfDHwP81jJXYB9SvqP7niJipPgoDyuzlN4H/Av6+mtr6dERcRpm9rMgAX11LHxWwniJiAPgS8KHM/NnFNl1hbEP0kpm/zMxrqZ/BvjYiXnWRzTdkHxFxE7CQmTOt7rLC2Lr30WBvZl5H/ZNG3x8Rb7zIthu5l63Up0z/NjNfAzxDfcpkNRu5lxUZ4DAfEYMA1f1CNb6hPyogIi6hHt6fz8x7quEiewHIzHPANHAD5fWxF3h7RMxR/1TON0XE5yivDwAy84nqfgG4l/onj5bYy2ngdPWqDuBu6oFeYi8rMsDrl/+PV8vj1OeTL4y/MyJeGBGvBHYD31yH+paJiAA+A5zMzE80PFRULxFxVURsr5a3AW8GHqWwPjLzjsx8eWYOUf9IiX/JzPdQWB8AEXFZRFx+YRl4C/AwBfaSmT8GfhQRV1dD+4DvUWAvq1rvSfh+3oAvAE8C/0v9f9vbgJdS/+XTqep+Z8P2H6X+m+jHgLeud/0Ndf0e9Zd23wUerG5vK60X4HeB71R9PAz8aTVeVB9LehrlV7/ELK4P6vPGD1W3R4CPltpLVdu1wInqZ+yfgB2l9rLSzUvpJalQTqFIUqEMcEkqlAEuSYUywCWpUAa4JBXKAJekQhngklSo/webY0sHvXRGowAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "blast_res[\"bitscore\"].hist()" + ] + }, + { + "cell_type": "markdown", + "id": "attempted-development", + "metadata": {}, + "source": [ + "- Plot a barplot of the number of hits per species (species are considered the last code after the \"_\" in the sseqid column)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "swiss-provider", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>sp|O60218|AK1BA</td>\n", + " <td>HUMAN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>sp|C9JRZ8|AK1BF</td>\n", + " <td>HUMAN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>sp|O08782|ALD2</td>\n", + " <td>CRIGR</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>sp|P45377|ALD2</td>\n", + " <td>MOUSE</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>sp|P21300|ALD1</td>\n", + " <td>MOUSE</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>171</th>\n", + " <td>sp|P80874|GS69</td>\n", + " <td>BACSU</td>\n", + " </tr>\n", + " <tr>\n", + " <th>172</th>\n", + " <td>sp|Q56Y42|PLR1</td>\n", + " <td>ARATH</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>sp|P25906|YDBC</td>\n", + " <td>ECOLI</td>\n", + " </tr>\n", + " <tr>\n", + " <th>174</th>\n", + " <td>sp|C6TBN2|AKR1</td>\n", + " <td>SOYBN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>175</th>\n", + " <td>sp|P49261|CROB</td>\n", + " <td>LEPLU</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>176 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1\n", + "0 sp|O60218|AK1BA HUMAN\n", + "1 sp|C9JRZ8|AK1BF HUMAN\n", + "2 sp|O08782|ALD2 CRIGR\n", + "3 sp|P45377|ALD2 MOUSE\n", + "4 sp|P21300|ALD1 MOUSE\n", + ".. ... ...\n", + "171 sp|P80874|GS69 BACSU\n", + "172 sp|Q56Y42|PLR1 ARATH\n", + "173 sp|P25906|YDBC ECOLI\n", + "174 sp|C6TBN2|AKR1 SOYBN\n", + "175 sp|P49261|CROB LEPLU\n", + "\n", + "[176 rows x 2 columns]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# First extract the species information from the sseqid column\n", + "hits_by_sp = blast_res.sseqid.str.split(\"_\", expand=True)\n", + "hits_by_sp" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "russian-mystery", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<AxesSubplot:>" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Then count their occurences and do the barplot\n", + "hits_by_sp.loc[:, 1].value_counts().plot(kind=\"bar\")" + ] + }, + { + "cell_type": "markdown", + "id": "reliable-shark", + "metadata": {}, + "source": [ + "# Extra exercise" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fabulous-endorsement", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "boxed-basin", + "metadata": {}, + "source": [ + "read the 'data/city_temperature.csv'\n", + "\n", + "force the City datatype to string by passing\n", + "```\n", + "dtype={'City': str}\n", + "```\n", + "As argument to the function to read the file.<br />\n", + "Don't worry to the warning, it is due to State wich contains Nan for non US contry, but we do not use these data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "positive-gateway", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/bneron/Projects/MNE/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3165: DtypeWarning: Columns (2) have mixed types.Specify dtype option on import or set low_memory=False.\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" + ] + } + ], + "source": [ + "world = pd.read_csv('data/city_temperature.csv' , sep=',', dtype={'City': str})" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "noble-economics", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Region', 'Country', 'State', 'City', 'Month', 'Day', 'Year',\n", + " 'AvgTemperature'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "world.columns" + ] + }, + { + "cell_type": "markdown", + "id": "international-glenn", + "metadata": {}, + "source": [ + "We will work only on Europe Region. so creat data named europe with only these data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "exciting-founder", + "metadata": {}, + "outputs": [], + "source": [ + "europe = world[world['Region'] == 'Europe']" + ] + }, + { + "cell_type": "markdown", + "id": "dressed-carbon", + "metadata": {}, + "source": [ + "wich country are in europe?" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "crude-pillow", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Albania', 'Austria', 'Belarus', 'Belgium', 'Bulgaria', 'Croatia',\n", + " 'Cyprus', 'Czech Republic', 'Denmark', 'Finland', 'France',\n", + " 'Germany', 'Georgia', 'Greece', 'Hungary', 'Iceland', 'Ireland',\n", + " 'Italy', 'Latvia', 'Macedonia', 'The Netherlands', 'Norway',\n", + " 'Poland', 'Portugal', 'Romania', 'Russia', 'Serbia-Montenegro',\n", + " 'Slovakia', 'Spain', 'Sweden', 'Switzerland', 'Ukraine',\n", + " 'United Kingdom', 'Yugoslavia'], dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "europe.Country.unique()" + ] + }, + { + "cell_type": "markdown", + "id": "dated-guest", + "metadata": {}, + "source": [ + "remove columns 'Region' and 'State' from the data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "valued-minutes", + "metadata": {}, + "outputs": [], + "source": [ + "europe = europe[['Country', 'City', 'Month', 'Day', 'Year', 'AvgTemperature']]" + ] + }, + { + "cell_type": "markdown", + "id": "million-blank", + "metadata": {}, + "source": [ + "from europe data create a new dataset containing countries: 'France', 'Spain', 'Italy'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "textile-proof", + "metadata": {}, + "outputs": [], + "source": [ + "fr_sp_it = europe[europe['Country'].isin(['France', 'Spain', 'Italy'])]" + ] + }, + { + "cell_type": "markdown", + "id": "statutory-hierarchy", + "metadata": {}, + "source": [ + "group the data on 'City' and 'Year' compute the mean of each group and keep only the 'AvgTemperature' column." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "induced-finish", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Year\n", + "Barcelona 1995 62.019178\n", + " 1996 61.125956\n", + " 1997 62.612329\n", + " 1998 60.273973\n", + " 1999 61.204658\n", + " ... \n", + "Rome 2016 61.185246\n", + " 2017 61.377808\n", + " 2018 60.821370\n", + " 2019 59.215068\n", + " 2020 52.676119\n", + "Name: AvgTemperature, Length: 182, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fr_sp_it_mean = fr_sp_it.groupby(['City', 'Year']).mean()['AvgTemperature']\n", + "fr_sp_it_mean" + ] + }, + { + "cell_type": "markdown", + "id": "bibliographic-bidding", + "metadata": {}, + "source": [ + "do the same but compute the standard deviation" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "valued-smooth", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City Year\n", + "Barcelona 1995 9.569756\n", + " 1996 9.420765\n", + " 1997 9.827235\n", + " 1998 19.750126\n", + " 1999 13.904526\n", + " ... \n", + "Rome 2016 15.914193\n", + " 2017 11.916595\n", + " 2018 20.327932\n", + " 2019 23.514064\n", + " 2020 6.224294\n", + "Name: AvgTemperature, Length: 182, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fr_sp_it_std = fr_sp_it.groupby(['City', 'Year']).std()['AvgTemperature']\n", + "fr_sp_it_std" + ] + }, + { + "cell_type": "markdown", + "id": "outdoor-content", + "metadata": {}, + "source": [ + "* reset the index fo the mean data and std data\n", + "* rename the column AvgTemperature to Tmp on the mean data\n", + "* rename the column AvgTemperature to std on the std data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "dangerous-republican", + "metadata": {}, + "outputs": [], + "source": [ + "data_mean = fr_sp_it_mean.reset_index()\n", + "data_mean.columns = ['City', 'Year', 'Tmp']\n", + "data_std = fr_sp_it_std.reset_index()\n", + "data_std.columns = ['City', 'Year', 'std']" + ] + }, + { + "cell_type": "markdown", + "id": "equivalent-grove", + "metadata": {}, + "source": [ + "merge the two table data_mean and data_std" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "appreciated-europe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>City</th>\n", + " <th>Year</th>\n", + " <th>Tmp</th>\n", + " <th>std</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Barcelona</td>\n", + " <td>1995</td>\n", + " <td>62.019178</td>\n", + " <td>9.569756</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Barcelona</td>\n", + " <td>1996</td>\n", + " <td>61.125956</td>\n", + " <td>9.420765</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Barcelona</td>\n", + " <td>1997</td>\n", + " <td>62.612329</td>\n", + " <td>9.827235</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Barcelona</td>\n", + " <td>1998</td>\n", + " <td>60.273973</td>\n", + " <td>19.750126</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Barcelona</td>\n", + " <td>1999</td>\n", + " <td>61.204658</td>\n", + " <td>13.904526</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>177</th>\n", + " <td>Rome</td>\n", + " <td>2016</td>\n", + " <td>61.185246</td>\n", + " <td>15.914193</td>\n", + " </tr>\n", + " <tr>\n", + " <th>178</th>\n", + " <td>Rome</td>\n", + " <td>2017</td>\n", + " <td>61.377808</td>\n", + " <td>11.916595</td>\n", + " </tr>\n", + " <tr>\n", + " <th>179</th>\n", + " <td>Rome</td>\n", + " <td>2018</td>\n", + " <td>60.821370</td>\n", + " <td>20.327932</td>\n", + " </tr>\n", + " <tr>\n", + " <th>180</th>\n", + " <td>Rome</td>\n", + " <td>2019</td>\n", + " <td>59.215068</td>\n", + " <td>23.514064</td>\n", + " </tr>\n", + " <tr>\n", + " <th>181</th>\n", + " <td>Rome</td>\n", + " <td>2020</td>\n", + " <td>52.676119</td>\n", + " <td>6.224294</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>182 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " City Year Tmp std\n", + "0 Barcelona 1995 62.019178 9.569756\n", + "1 Barcelona 1996 61.125956 9.420765\n", + "2 Barcelona 1997 62.612329 9.827235\n", + "3 Barcelona 1998 60.273973 19.750126\n", + "4 Barcelona 1999 61.204658 13.904526\n", + ".. ... ... ... ...\n", + "177 Rome 2016 61.185246 15.914193\n", + "178 Rome 2017 61.377808 11.916595\n", + "179 Rome 2018 60.821370 20.327932\n", + "180 Rome 2019 59.215068 23.514064\n", + "181 Rome 2020 52.676119 6.224294\n", + "\n", + "[182 rows x 4 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_data = pd.merge(data_mean, data_std, on=['City', 'Year'])\n", + "clean_data" + ] + }, + { + "cell_type": "markdown", + "id": "asian-evanescence", + "metadata": {}, + "source": [ + "save the data in a file" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "analyzed-beaver", + "metadata": {}, + "outputs": [], + "source": [ + "clean_data.to_csv('data/fr_sp_it_temp.tsv', sep='\\t')" + ] + }, + { + "cell_type": "markdown", + "id": "elect-percentage", + "metadata": {}, + "source": [ + "# Teasing\n", + "\n", + "a quick data plotting. we will improve it in matplotlib course" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "beneficial-coordinator", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "for city, df in clean_data.groupby('City'):\n", + " df.plot('Year', 'Tmp', label=city)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "neither-popularity", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/pandas_cours.ipynb b/notebooks/pandas_cours.ipynb new file mode 100644 index 0000000..cca2333 --- /dev/null +++ b/notebooks/pandas_cours.ipynb @@ -0,0 +1,7757 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "lesser-criticism", + "metadata": {}, + "source": [ + "# <center>**Cours**</center>\n", + "\n", + "<div style=\"text-align:center\">\n", + " <img src=\"images/pandas_logo.svg\" width=\"600px\">\n", + " <div>\n", + " Bertrand Néron, François Laurent, Etienne Kornobis\n", + " <br />\n", + " <a src=\" https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/\">Bioinformatics and Biostatistiqucs HUB</a>\n", + " <br />\n", + " © Institut Pasteur, 2021\n", + " </div> \n", + "</div>" + ] + }, + { + "cell_type": "markdown", + "id": "attempted-certificate", + "metadata": {}, + "source": [ + "# Intro\n", + "\n", + "**Pandas** is a library to manipulate data structures and perform data analysis and visualization. Pandas is built on top of **Numpy**, a widely used library for mathematical operation particularly on arrays and matrices. Pandas is helping with data analysis stack, including data cleaning/formatting followed by analysis and visualization.\n", + "\n", + "Pandas is particularly well suited to deal with tabular data which can be imported from different formats such are **csv**, **tsv** or even **xlsx**.\n", + "\n", + "The two primary data structures in pandas are **Series** and **DataFrames**.\n", + "\n", + "Pandas is designed to manipulate tabulated data, Numpy is designed to do computation on arrays. So here are the differences: \n", + "\n", + "**Numpy** \n", + "* handles one structure: the ndarray.\n", + "* an *array* can have 1, 2 or more dimensions.\n", + "* A *ndarray* handles homogeneous data, only one datatype in an array.\n", + "* So numpy is mostly used to do math on arrays.\n", + "\n", + "**Pandas** \n", + "* *Series* have 1 dimension, *DataFrame* have 2 dimensions.\n", + "* *Pandas* does **not** handle structures with more than 2 dimensions.\n", + "* But a *DataFrame* can contain heterogenous data, each column can have a different datatype.\n", + "* *Pandas* is more powerful to query data or manipulate them.\n", + "\n", + "So *Numpy* is mostly used to do math, *Pandas* to explore data structured in tables. " + ] + }, + { + "cell_type": "markdown", + "id": "angry-banking", + "metadata": {}, + "source": [ + "# Installation\n", + "\n", + "For *conda* users\n", + "\n", + "```shell\n", + "conda install pandas\n", + "```\n", + "\n", + "for *pip* users\n", + "```shell\n", + "pip install pandas\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "british-currency", + "metadata": {}, + "source": [ + "# Import Convention" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "proud-coffee", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "english-subdivision", + "metadata": {}, + "source": [ + "# Series\n", + "\n", + "A Series is a one-dimensional array with axis labels. Labels do not need to be\n", + "unique but must be hashable.\n", + "\n", + "To create a series, use the pandas `Series` object and specify a list or tuple\n", + "of value to feed your serie with as the first argument:" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "outer-brass", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_nolabel = pd.Series([1,2,3])\n", + "type(serie_nolabel)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "executive-right", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "dtype: int64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_nolabel" + ] + }, + { + "cell_type": "markdown", + "id": "personal-cleaners", + "metadata": {}, + "source": [ + "You can specify the labels of your Series by providing a list of labels as\n", + "for the `index` argument:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "spatial-disposal", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A 1\n", + "B 2\n", + "C 3\n", + "dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label = pd.Series([1,2,3], index=['A', 'B', 'C'])\n", + "serie_label" + ] + }, + { + "cell_type": "markdown", + "id": "reduced-retention", + "metadata": {}, + "source": [ + "And we can access these indices with the `index` property:" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "classical-sapphire", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=3, step=1)" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_nolabel.index" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "known-absorption", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['A', 'B', 'C'], dtype='object')" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label.index" + ] + }, + { + "cell_type": "markdown", + "id": "amateur-secret", + "metadata": {}, + "source": [ + "## Indexing/Slicing\n", + "\n", + "In order to subset a serie based on an **integer index**, you can use the `iloc` attribute:" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "exact-accuracy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_nolabel.iloc[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "hairy-inspiration", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label.iloc[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "social-extra", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A 1\n", + "B 2\n", + "dtype: int64" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label.iloc[0:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "diagnostic-flood", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "C 3\n", + "B 2\n", + "A 1\n", + "dtype: int64" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label.iloc[::-1]" + ] + }, + { + "cell_type": "markdown", + "id": "mysterious-airline", + "metadata": {}, + "source": [ + "Most commonly, You can use **labels** as well for subsetting, using the `loc` attribute:" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "private-profession", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label.loc[\"B\"]" + ] + }, + { + "cell_type": "markdown", + "id": "forbidden-conjunction", + "metadata": {}, + "source": [ + "**WARNING**: With `loc`, the value is interpreted as a label of the\n", + " index, and **never** as an integer position along the index, there is `iloc` for this.\n", + " \n", + "When index labels are strings, you can as well access the corresponding value using this simple syntax `.LABEL_VALUE`" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "hawaiian-fever", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label.A" + ] + }, + { + "cell_type": "markdown", + "id": "prescribed-literature", + "metadata": {}, + "source": [ + "Serie objects benefit from many attributes and methods (see [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html)), lot's of them being common with pandas DataFrames. We will see some of the one listed below in action in the DataFrame section of this course.\n", + "\n", + "Here are some attributes of interest:\n", + "\n", + "|Attribute|Action|\n", + "|-|-|\n", + "|index|Returns the index (0 axis labels) of the Serie|\n", + "|name|Return the name of the Serie|\n", + "|shape|Return the number of element in the Serie|\n", + "\n", + "And some useful methods:\n", + "\n", + "|Method|Action|\n", + "|-|-|\n", + "|aggregate|Aggregate using one or more operations over the specified axis|\n", + "|all|Return whether all elements are True potentially over an axis|\n", + "|any|Return whether any element is True potentially over an axis|\n", + "|apply|Invoke function on values of Series|\n", + "|astype|Cast a pandas object to a specified dtype|\n", + "|copy|Make a copy of this object’s indices and data|\n", + "|count|Return number of non-NA/null observations in the Series|\n", + "|describe|Generate descriptive statistics that summarize the central tendency dispersion and shape of a dataset’s distribution, excluding NaN values|\n", + "|drop|Return Series with specified index labels removed|\n", + "|groupby|Group DataFrame or Series using a mapper or by a Series of columns|\n", + "|head / tail|Return the first / last n rows|\n", + "|max, min, median, mean, sum|Perform the corresponding operation on the Serie|\n", + "|plot|Plot graphs from Serie/DataFrame|\n", + "|reset_index|Generate a new DataFrame or Series with the index reset|\n", + "|sort_values|Sort by values a the specified column|\n", + "|str|String methods for series| |\n", + "|to_csv, to_excel|Export to csv or excel file|\n", + "|unique|Return unique values of Series object|\n", + "|value_counts|Return a Series containing counts of unique values|\n" + ] + }, + { + "cell_type": "markdown", + "id": "precious-green", + "metadata": {}, + "source": [ + "## Operations on Series\n", + "\n", + "Comparison operators (ie `==`, `<`, `<=`, `>=`, `>`) can be used on Series as well as DataFrames for subsetting.\n", + "\n", + "For example, we want to see which values are superior to one in our previous Serie:" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "optimum-drama", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A False\n", + "B True\n", + "C True\n", + "dtype: bool" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label > 1" + ] + }, + { + "cell_type": "markdown", + "id": "twenty-planet", + "metadata": {}, + "source": [ + "Since `loc` can take list or Series of booleans as input, we can then apply this Boolean Serie as a mask for our Serie:" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "universal-responsibility", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "B 2\n", + "C 3\n", + "dtype: int64" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label.loc[serie_label>1]" + ] + }, + { + "cell_type": "markdown", + "id": "pressed-clark", + "metadata": {}, + "source": [ + "## Operations between Series" + ] + }, + { + "cell_type": "markdown", + "id": "thick-meter", + "metadata": {}, + "source": [ + "Operations (ie `+`, `-`, `*`, `/`) between Series will trigger an alignment of the values\n", + "based on the index values:" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "departmental-creature", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A 2\n", + "B 4\n", + "C 6\n", + "dtype: int64" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label + serie_label" + ] + }, + { + "cell_type": "markdown", + "id": "regulation-listening", + "metadata": {}, + "source": [ + "We can see here that the label are aligned prior operation" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "electric-cherry", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A 2\n", + "B 4\n", + "C 6\n", + "dtype: int64" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_label + serie_label.iloc[::-1]" + ] + }, + { + "cell_type": "markdown", + "id": "positive-batman", + "metadata": {}, + "source": [ + "# DataFrames\n", + "\n", + "A pandas DataFrame is a two-dimensional data structure with axis labels. Labels do not need to be unique but must be hashable. DataFrame in pandas are like dictionary containers of Series objects.\n", + "\n", + "## DataFrame Terminology\n", + "\n", + "<img src=\"images/pandas_dataframe.png\" width=\"300px\" />\n", + "\n", + "## Create a DataFrame\n", + "\n", + "[Dataframes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) in pandas are rarely created from scratch. One common approach is to create a pandas DataFrame from a dictionary or a file, but you can as well create them from a list of lists or numpy ndarrays. \n", + "\n", + "### From a list of lists:" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "following-houston", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>a</th>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>b</th>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>6</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B C\n", + "a 1 2 3\n", + "b 4 5 6" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame([[1,2,3],\n", + " [4,5,6]],\n", + " columns=['A', 'B', 'C'],\n", + " index= ['a', 'b'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "personalized-kennedy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b'], dtype='object')" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.index" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "conceptual-boards", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['A', 'B', 'C'], dtype='object')" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "id": "agricultural-spotlight", + "metadata": {}, + "source": [ + "### From a numpy ndarray" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "minor-korean", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2\n", + "0 0 1 2\n", + "1 3 4 5\n", + "2 6 7 8\n", + "3 9 10 11" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.arange(12).reshape(4,3))\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "still-commissioner", + "metadata": {}, + "source": [ + "- From a dictionnary" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "intellectual-wilson", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>6</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B\n", + "0 1 4\n", + "1 2 5\n", + "2 3 6" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({'A': [1,2,3],\n", + " 'B': np.arange(4,7),\n", + " })\n", + " \n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "international-checkout", + "metadata": {}, + "source": [ + "- From a file, many options are available, to name only a few:\n", + " - [pd.read_csv](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)\n", + " - [pd.read_excel](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html)\n", + " - [pd.read_html](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_html.html)\n", + " \n", + "NB: For excel and html imports, you might need to install extra libraries." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bronze-prayer", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "titanic = pd.read_csv(\"data/titanic.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "laden-composer", + "metadata": {}, + "source": [ + "We want to open *data/bar_data.tsv* file but the 2 first lines are comments and the separator between fields is *tab*\n", + "\n", + "See below the 5 first lines (using the `!` jupyter magic for bash subprocesses)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "grave-party", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# generated with fooo software version 12bis\n", + "# 2021/02/31\n", + "cond1\tcond2\tcond3\tcontrol\n", + "14.644417316782045\t2.9453091400880465\t24.81171864537413\t5.114340165446571\n", + "12.071043262601615\t4.406424332565544\t21.574601309211538\t2.5071180945299716\n" + ] + } + ], + "source": [ + "! head -5 data/bar_data.tsv" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "historical-ivory", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>cond1</th>\n", + " <th>cond2</th>\n", + " <th>cond3</th>\n", + " <th>control</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>14.644417</td>\n", + " <td>2.945309</td>\n", + " <td>24.811719</td>\n", + " <td>5.114340</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12.071043</td>\n", + " <td>4.406424</td>\n", + " <td>21.574601</td>\n", + " <td>2.507118</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>8.227469</td>\n", + " <td>3.185252</td>\n", + " <td>20.651623</td>\n", + " <td>4.449593</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>8.980799</td>\n", + " <td>9.233560</td>\n", + " <td>24.859737</td>\n", + " <td>4.127919</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>9.080359</td>\n", + " <td>5.629192</td>\n", + " <td>18.443504</td>\n", + " <td>4.268572</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " cond1 cond2 cond3 control\n", + "0 14.644417 2.945309 24.811719 5.114340\n", + "1 12.071043 4.406424 21.574601 2.507118\n", + "2 8.227469 3.185252 20.651623 4.449593\n", + "3 8.980799 9.233560 24.859737 4.127919\n", + "4 9.080359 5.629192 18.443504 4.268572" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = pd.read_csv(\"data/bar_data.tsv\", sep=\"\\t\", comment=\"#\")\n", + "bar.head()" + ] + }, + { + "cell_type": "markdown", + "id": "bacterial-irrigation", + "metadata": {}, + "source": [ + "If the data in the file are already indexed like in this one:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "supported-health", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tMW\tAlogP\tPSA\tHBA\n", + "0\t0.0\t1.0\t72.73111270481336\t1.1416684150966834\n", + "1\t3.63\t544.59\t391.4275648686457\t0.9848635571682688\n", + "2\t2.11\t383.4\t437.4589821943501\t15.040385372412596\n", + "3\t1.24\t162.23\t480.1112629835199\t11.401906578750385\n" + ] + } + ], + "source": [ + "! head -5 data/data_for_plt.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "discrete-anaheim", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>MW</th>\n", + " <th>AlogP</th>\n", + " <th>PSA</th>\n", + " <th>HBA</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0.00</td>\n", + " <td>1.00</td>\n", + " <td>72.731113</td>\n", + " <td>1.141668</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>3.63</td>\n", + " <td>544.59</td>\n", + " <td>391.427565</td>\n", + " <td>0.984864</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>2.11</td>\n", + " <td>383.40</td>\n", + " <td>437.458982</td>\n", + " <td>15.040385</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 MW AlogP PSA HBA\n", + "0 0 0.00 1.00 72.731113 1.141668\n", + "1 1 3.63 544.59 391.427565 0.984864\n", + "2 2 2.11 383.40 437.458982 15.040385" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"data/data_for_plt.csv\", sep=\"\\t\")\n", + "data.head(3)" + ] + }, + { + "cell_type": "markdown", + "id": "latest-public", + "metadata": {}, + "source": [ + "To avoiding to have an extra column, you can specify which columns to use as index.\n", + "This column **must** have distincts values." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "casual-buying", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>MW</th>\n", + " <th>AlogP</th>\n", + " <th>PSA</th>\n", + " <th>HBA</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.00</td>\n", + " <td>1.00</td>\n", + " <td>72.731113</td>\n", + " <td>1.141668</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3.63</td>\n", + " <td>544.59</td>\n", + " <td>391.427565</td>\n", + " <td>0.984864</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2.11</td>\n", + " <td>383.40</td>\n", + " <td>437.458982</td>\n", + " <td>15.040385</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1.24</td>\n", + " <td>162.23</td>\n", + " <td>480.111263</td>\n", + " <td>11.401907</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>-1.37</td>\n", + " <td>361.37</td>\n", + " <td>448.864769</td>\n", + " <td>5.732690</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " MW AlogP PSA HBA\n", + "0 0.00 1.00 72.731113 1.141668\n", + "1 3.63 544.59 391.427565 0.984864\n", + "2 2.11 383.40 437.458982 15.040385\n", + "3 1.24 162.23 480.111263 11.401907\n", + "4 -1.37 361.37 448.864769 5.732690" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"data/data_for_plt.csv\", sep=\"\\t\", index_col=0)\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "commercial-system", + "metadata": {}, + "source": [ + "The first line is used as header.<br />\n", + "So you can specify the number of the row which represents the header,\n", + "or you can set this parameter to None if the table has no header." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "golden-myrtle", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " </tr>\n", + " <tr>\n", + " <th>0</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.00</td>\n", + " <td>1.00</td>\n", + " <td>72.731113</td>\n", + " <td>1.141668</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3.63</td>\n", + " <td>544.59</td>\n", + " <td>391.427565</td>\n", + " <td>0.984864</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2.11</td>\n", + " <td>383.40</td>\n", + " <td>437.458982</td>\n", + " <td>15.040385</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1.24</td>\n", + " <td>162.23</td>\n", + " <td>480.111263</td>\n", + " <td>11.401907</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>-1.37</td>\n", + " <td>361.37</td>\n", + " <td>448.864769</td>\n", + " <td>5.732690</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 1 2 3 4\n", + "0 \n", + "0 0.00 1.00 72.731113 1.141668\n", + "1 3.63 544.59 391.427565 0.984864\n", + "2 2.11 383.40 437.458982 15.040385\n", + "3 1.24 162.23 480.111263 11.401907\n", + "4 -1.37 361.37 448.864769 5.732690" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"data/no_header.tsv\", sep=\"\\t\", index_col=0, header=None)\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "thorough-worth", + "metadata": {}, + "source": [ + "## Characterizing a DataFrame\n", + "\n", + "Several DataFrame attributes and methods are provided to characterize your dataset. Here is a subset of them most commonly used." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "still-pepper", + "metadata": {}, + "outputs": [], + "source": [ + "titanic = pd.read_csv(\"data/titanic.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "impossible-security", + "metadata": {}, + "source": [ + "`shape` to get the dimensions of the dataframe (ie number or rows, number of columns):" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "nutritional-andrews", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The titanic dataset is 891 length\n", + "The titanic dataset contains 891 rows x 12 columns\n" + ] + } + ], + "source": [ + "print(f\"The titanic dataset is {len(titanic)} length\")\n", + "rows, cols = titanic.shape\n", + "print(f\"The titanic dataset contains {rows} rows x {cols} columns\")" + ] + }, + { + "cell_type": "markdown", + "id": "empirical-prospect", + "metadata": {}, + "source": [ + "`head` to get the first lines of your dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ancient-gravity", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Braund, Mr. Owen Harris</td>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " <td>7.2500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Allen, Mr. William Henry</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "powered-navigator", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Braund, Mr. Owen Harris</td>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " <td>7.2500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.head(n=2)" + ] + }, + { + "cell_type": "markdown", + "id": "vocal-pencil", + "metadata": {}, + "source": [ + "`tail` to get the last lines of your dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "blessed-family", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>889</th>\n", + " <td>890</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Behr, Mr. Karl Howell</td>\n", + " <td>male</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>111369</td>\n", + " <td>30.00</td>\n", + " <td>C148</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>890</th>\n", + " <td>891</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Dooley, Mr. Patrick</td>\n", + " <td>male</td>\n", + " <td>32.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>370376</td>\n", + " <td>7.75</td>\n", + " <td>NaN</td>\n", + " <td>Q</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass Name Sex Age SibSp \\\n", + "889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 891 0 3 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "889 0 111369 30.00 C148 C \n", + "890 0 370376 7.75 NaN Q " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.tail(2)" + ] + }, + { + "cell_type": "markdown", + "id": "molecular-messaging", + "metadata": {}, + "source": [ + "`describe` to have basic descriptive statistics. The columns on which pandas cannot do statistics are omitted (Name, Sex, ...)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "touched-lawsuit", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Fare</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>count</th>\n", + " <td>891.000000</td>\n", + " <td>891.000000</td>\n", + " <td>891.000000</td>\n", + " <td>714.000000</td>\n", + " <td>891.000000</td>\n", + " <td>891.000000</td>\n", + " <td>891.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mean</th>\n", + " <td>446.000000</td>\n", + " <td>0.383838</td>\n", + " <td>2.308642</td>\n", + " <td>29.699118</td>\n", + " <td>0.523008</td>\n", + " <td>0.381594</td>\n", + " <td>32.204208</td>\n", + " </tr>\n", + " <tr>\n", + " <th>std</th>\n", + " <td>257.353842</td>\n", + " <td>0.486592</td>\n", + " <td>0.836071</td>\n", + " <td>14.526497</td>\n", + " <td>1.102743</td>\n", + " <td>0.806057</td>\n", + " <td>49.693429</td>\n", + " </tr>\n", + " <tr>\n", + " <th>min</th>\n", + " <td>1.000000</td>\n", + " <td>0.000000</td>\n", + " <td>1.000000</td>\n", + " <td>0.420000</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25%</th>\n", + " <td>223.500000</td>\n", + " <td>0.000000</td>\n", + " <td>2.000000</td>\n", + " <td>20.125000</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>7.910400</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50%</th>\n", + " <td>446.000000</td>\n", + " <td>0.000000</td>\n", + " <td>3.000000</td>\n", + " <td>28.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>14.454200</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75%</th>\n", + " <td>668.500000</td>\n", + " <td>1.000000</td>\n", + " <td>3.000000</td>\n", + " <td>38.000000</td>\n", + " <td>1.000000</td>\n", + " <td>0.000000</td>\n", + " <td>31.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>max</th>\n", + " <td>891.000000</td>\n", + " <td>1.000000</td>\n", + " <td>3.000000</td>\n", + " <td>80.000000</td>\n", + " <td>8.000000</td>\n", + " <td>6.000000</td>\n", + " <td>512.329200</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass Age SibSp \\\n", + "count 891.000000 891.000000 891.000000 714.000000 891.000000 \n", + "mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n", + "std 257.353842 0.486592 0.836071 14.526497 1.102743 \n", + "min 1.000000 0.000000 1.000000 0.420000 0.000000 \n", + "25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n", + "50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n", + "75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n", + "max 891.000000 1.000000 3.000000 80.000000 8.000000 \n", + "\n", + " Parch Fare \n", + "count 891.000000 891.000000 \n", + "mean 0.381594 32.204208 \n", + "std 0.806057 49.693429 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 7.910400 \n", + "50% 0.000000 14.454200 \n", + "75% 0.000000 31.000000 \n", + "max 6.000000 512.329200 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "desc = titanic.describe()\n", + "desc" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "monthly-plasma", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "titanic have 12 cols\n", + "desc have 7 cols\n" + ] + } + ], + "source": [ + "print(f\"titanic have {len(titanic.columns)} cols\\ndesc have {len(desc.columns)} cols\")" + ] + }, + { + "cell_type": "markdown", + "id": "designing-tuning", + "metadata": {}, + "source": [ + "`median` to get the median by columns with numerical values:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "becoming-living", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PassengerId 446.0000\n", + "Survived 0.0000\n", + "Pclass 3.0000\n", + "Age 28.0000\n", + "SibSp 0.0000\n", + "Parch 0.0000\n", + "Fare 14.4542\n", + "dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.median()" + ] + }, + { + "cell_type": "markdown", + "id": "ethical-fishing", + "metadata": {}, + "source": [ + "`mean` similarly for the mean:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "weekly-attack", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PassengerId 446.000000\n", + "Survived 0.383838\n", + "Pclass 2.308642\n", + "Age 29.699118\n", + "SibSp 0.523008\n", + "Parch 0.381594\n", + "Fare 32.204208\n", + "dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.mean()" + ] + }, + { + "cell_type": "markdown", + "id": "automatic-syntax", + "metadata": {}, + "source": [ + "`value_counts` is useful the count the number of occurences of a value. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "accepting-gregory", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "male 577\n", + "female 314\n", + "Name: Sex, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.Sex.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "heavy-warner", + "metadata": {}, + "source": [ + "`max` and `min` to get the maximum and minimum:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "rough-confusion", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "80.0" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.Age.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "deluxe-veteran", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.42" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.Age.min()" + ] + }, + { + "cell_type": "markdown", + "id": "egyptian-booth", + "metadata": {}, + "source": [ + "## DataFrame manipulation" + ] + }, + { + "cell_type": "markdown", + "id": "noble-number", + "metadata": {}, + "source": [ + "### Renaming columns" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "amino-demographic", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B C\n", + "0 0 1 2\n", + "1 3 4 5\n", + "2 6 7 8\n", + "3 9 10 11" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.arange(12).reshape(4,3),\n", + " columns=['A', 'B', 'C'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "surface-dimension", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['A', 'B', 'Z'], dtype='object')" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols = list(df.columns)\n", + "cols[2] = 'Z'\n", + "df.columns = cols\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "southwest-corruption", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z\n", + "0 0 1 2\n", + "1 3 4 5\n", + "2 6 7 8\n", + "3 9 10 11" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = ['X', 'Y', 'Z']\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "competitive-strap", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A Y Z\n", + "0 0 1 2\n", + "1 3 4 5\n", + "2 6 7 8\n", + "3 9 10 11" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns={'X': 'A'})" + ] + }, + { + "cell_type": "markdown", + "id": "sonic-penalty", + "metadata": {}, + "source": [ + "### Rename index" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "annual-botswana", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>a</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>b</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>e</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z\n", + "a 0 1 2\n", + "b 3 4 5\n", + "c 6 7 8\n", + "e 9 10 11" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.index = ['a', 'b', 'c', 'e']\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "olive-master", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>a</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>b</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>d</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z\n", + "a 0 1 2\n", + "b 3 4 5\n", + "c 6 7 8\n", + "d 9 10 11" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(index={'e':'d'})" + ] + }, + { + "cell_type": "markdown", + "id": "coupled-encoding", + "metadata": {}, + "source": [ + "### Add column" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "optional-train", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " <th>id</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>a</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>b</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>400</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>e</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>12</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z id\n", + "a 0 1 2 0\n", + "b 3 4 5 400\n", + "c 6 7 8 3\n", + "e 9 10 11 12" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['id'] = [0, 400, 3,12]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "neural-thought", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " <th>id</th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " <th>id</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>a</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>b</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>400</td>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>400</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " <td>3</td>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>e</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>12</td>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>12</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z id X Y Z id\n", + "a 0 1 2 0 0 1 2 0\n", + "b 3 4 5 400 3 4 5 400\n", + "c 6 7 8 3 6 7 8 3\n", + "e 9 10 11 12 9 10 11 12" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df, df], axis=1)" + ] + }, + { + "cell_type": "markdown", + "id": "apart-permission", + "metadata": {}, + "source": [ + "### Set column as index\n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "cathedral-bouquet", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " <tr>\n", + " <th>id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>400</th>\n", + " <td>4</td>\n", + " <td>3</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>7</td>\n", + " <td>6</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>10</td>\n", + " <td>9</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z\n", + "id \n", + "0 1 0 2\n", + "400 4 3 5\n", + "3 7 6 8\n", + "12 10 9 11" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.set_index(\"id\")" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "narrative-michael", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " <th>id</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>a</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>b</th>\n", + " <td>4</td>\n", + " <td>3</td>\n", + " <td>5</td>\n", + " <td>400</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c</th>\n", + " <td>7</td>\n", + " <td>6</td>\n", + " <td>8</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>e</th>\n", + " <td>10</td>\n", + " <td>9</td>\n", + " <td>11</td>\n", + " <td>12</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z id\n", + "a 1 0 2 0\n", + "b 4 3 5 400\n", + "c 7 6 8 3\n", + "e 10 9 11 12" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "id": "elder-apache", + "metadata": {}, + "source": [ + "The `inplace` argument is present accross different pandas methods in order to directly edit the object we are working on instead of creating a new object:" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "western-commander", + "metadata": {}, + "outputs": [], + "source": [ + "df.set_index(\"id\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "sorted-western", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " <tr>\n", + " <th>id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>400</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z\n", + "id \n", + "0 0 1 2\n", + "400 3 4 5\n", + "3 6 7 8\n", + "12 9 10 11" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "id": "continent-garbage", + "metadata": {}, + "source": [ + "### Add row\n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.append.html\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.concat.html" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "bigger-cartridge", + "metadata": {}, + "outputs": [], + "source": [ + "rows = pd.DataFrame([[30, 31, 32], [42, 43, 44]], columns=['X', 'Y', 'Z']) " + ] + }, + { + "cell_type": "markdown", + "id": "private-soviet", + "metadata": {}, + "source": [ + "Notice here from the documentation that we are using the default `axis=0` (ie a concatenation along rows)." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "helpful-venezuela", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>400</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>30</td>\n", + " <td>31</td>\n", + " <td>32</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>42</td>\n", + " <td>43</td>\n", + " <td>44</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z\n", + "0 0 1 2\n", + "400 3 4 5\n", + "3 6 7 8\n", + "12 9 10 11\n", + "0 30 31 32\n", + "1 42 43 44" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df, rows])" + ] + }, + { + "cell_type": "markdown", + "id": "representative-silicon", + "metadata": {}, + "source": [ + "You can choose also to `ignore_index`, similar to reseting and dropping the indices (but note that the index values on the other axes are still respected in the join):" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "single-angel", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>Y</th>\n", + " <th>Z</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>6</td>\n", + " <td>7</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X Y Z\n", + "0 0 1 2\n", + "1 3 4 5\n", + "2 6 7 8\n", + "3 9 10 11\n", + "4 0 1 2\n", + "5 3 4 5\n", + "6 6 7 8\n", + "7 9 10 11" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df, df], ignore_index=True)" + ] + }, + { + "cell_type": "markdown", + "id": "residential-jewel", + "metadata": {}, + "source": [ + "## Filtering tables\n", + "\n", + "> https://pandas.pydata.org/docs/user_guide/indexing.html#indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "dominican-vitamin", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Braund, Mr. Owen Harris</td>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " <td>7.2500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Allen, Mr. William Henry</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.head()" + ] + }, + { + "cell_type": "markdown", + "id": "brown-alberta", + "metadata": {}, + "source": [ + "### Selecting columns" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "rental-airfare", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 male\n", + "1 female\n", + "2 female\n", + "3 female\n", + "4 male\n", + "Name: Sex, dtype: object" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic['Sex'].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "adapted-vitamin", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>Pclass</th>\n", + " <th>Survived</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Sex Age Pclass Survived\n", + "0 male 22.0 3 0\n", + "1 female 38.0 1 1\n", + "2 female 26.0 3 1\n", + "3 female 35.0 1 1\n", + "4 male 35.0 3 0" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic[['Sex', 'Age', 'Pclass', 'Survived']].head()" + ] + }, + { + "cell_type": "markdown", + "id": "incorrect-material", + "metadata": {}, + "source": [ + "### Selecting on a condition" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "historic-headset", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Allen, Mr. William Henry</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>7</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>McCarthy, Mr. Timothy J</td>\n", + " <td>male</td>\n", + " <td>54.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>17463</td>\n", + " <td>51.8625</td>\n", + " <td>E46</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "6 7 0 1 \n", + "\n", + " Name Sex Age SibSp \\\n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S \n", + "6 0 17463 51.8625 E46 S " + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic[titanic['Age'] > 25].head()" + ] + }, + { + "cell_type": "markdown", + "id": "covered-beads", + "metadata": {}, + "source": [ + "### Indexing/Slicing\n", + "\n", + "> https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html\n", + "\n", + "As for pandas Series, you can use `loc` (with labels or booleans) and `iloc` (with integers) for indexing/slicing.\n", + "The first argument represent rows and the second columns.\n", + "\n", + "Both methods use the same syntax as numpy indexing/slicing" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "conscious-consistency", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Sex Age\n", + "1 female 38.0\n", + "2 female 26.0" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.loc[[1,2], ['Sex', 'Age']]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "automated-large", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Sex Age SibSp Parch Ticket\n", + "1 female 38.0 1 0 PC 17599\n", + "2 female 26.0 0 0 STON/O2. 3101282\n", + "3 female 35.0 1 0 113803\n", + "4 male 35.0 0 0 373450" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.loc[1:4, 'Sex':'Ticket'] # Ticket column is included" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "planned-prescription", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Sex Age\n", + "0 male 22.0\n", + "1 female 38.0" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.iloc[[0,1], [4, 5]]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "gothic-aluminum", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Sex Age SibSp Parch Ticket\n", + "0 male 22.0 1 0 A/5 21171\n", + "1 female 38.0 1 0 PC 17599\n", + "2 female 26.0 0 0 STON/O2. 3101282" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.iloc[0:3, 4:9] # the 9th column is exclude" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "stone-drill", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>Age</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>183</th>\n", + " <td>Becker, Master. Richard F</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>248</th>\n", + " <td>Beckwith, Mr. Richard Leonard</td>\n", + " <td>37.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>618</th>\n", + " <td>Becker, Miss. Marion Louise</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>871</th>\n", + " <td>Beckwith, Mrs. Richard Leonard (Sallie Monypeny)</td>\n", + " <td>47.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name Age\n", + "183 Becker, Master. Richard F 1.0\n", + "248 Beckwith, Mr. Richard Leonard 37.0\n", + "618 Becker, Miss. Marion Louise 4.0\n", + "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) 47.0" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask = titanic['Name'].str.contains('^Bec')\n", + "titanic[mask][['Name', 'Age']]" + ] + }, + { + "cell_type": "markdown", + "id": "subject-campbell", + "metadata": {}, + "source": [ + "### Selecting random samples\n", + "\n", + "> https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "duplicate-branch", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>830</th>\n", + " <td>831</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Yasbeck, Mrs. Antoni (Selini Alexander)</td>\n", + " <td>female</td>\n", + " <td>15.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>2659</td>\n", + " <td>14.4542</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>141</th>\n", + " <td>142</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Nysten, Miss. Anna Sofia</td>\n", + " <td>female</td>\n", + " <td>22.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>347081</td>\n", + " <td>7.7500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>378</th>\n", + " <td>379</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Betros, Mr. Tannous</td>\n", + " <td>male</td>\n", + " <td>20.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2648</td>\n", + " <td>4.0125</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>584</th>\n", + " <td>585</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Paulner, Mr. Uscher</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>3411</td>\n", + " <td>8.7125</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>820</th>\n", + " <td>821</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Hays, Mrs. Charles Melville (Clara Jennings Gr...</td>\n", + " <td>female</td>\n", + " <td>52.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>12749</td>\n", + " <td>93.5000</td>\n", + " <td>B69</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>497</th>\n", + " <td>498</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Shellard, Mr. Frederick William</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>C.A. 6212</td>\n", + " <td>15.1000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "830 831 1 3 \n", + "141 142 1 3 \n", + "378 379 0 3 \n", + "584 585 0 3 \n", + "820 821 1 1 \n", + "497 498 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "830 Yasbeck, Mrs. Antoni (Selini Alexander) female 15.0 1 \n", + "141 Nysten, Miss. Anna Sofia female 22.0 0 \n", + "378 Betros, Mr. Tannous male 20.0 0 \n", + "584 Paulner, Mr. Uscher male NaN 0 \n", + "820 Hays, Mrs. Charles Melville (Clara Jennings Gr... female 52.0 1 \n", + "497 Shellard, Mr. Frederick William male NaN 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "830 0 2659 14.4542 NaN C \n", + "141 0 347081 7.7500 NaN S \n", + "378 0 2648 4.0125 NaN C \n", + "584 0 3411 8.7125 NaN C \n", + "820 1 12749 93.5000 B69 S \n", + "497 0 C.A. 6212 15.1000 NaN S " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.sample(n=6)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "southwest-lighting", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>111</th>\n", + " <td>112</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Zabour, Miss. Hileni</td>\n", + " <td>female</td>\n", + " <td>14.5</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>2665</td>\n", + " <td>14.4542</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>211</th>\n", + " <td>212</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>Cameron, Miss. Clear Annie</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>F.C.C. 13528</td>\n", + " <td>21.0000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>264</th>\n", + " <td>265</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Henry, Miss. Delia</td>\n", + " <td>female</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>382649</td>\n", + " <td>7.7500</td>\n", + " <td>NaN</td>\n", + " <td>Q</td>\n", + " </tr>\n", + " <tr>\n", + " <th>363</th>\n", + " <td>364</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Asim, Mr. Adola</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>SOTON/O.Q. 3101310</td>\n", + " <td>7.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass Name Sex Age \\\n", + "111 112 0 3 Zabour, Miss. Hileni female 14.5 \n", + "211 212 1 2 Cameron, Miss. Clear Annie female 35.0 \n", + "264 265 0 3 Henry, Miss. Delia female NaN \n", + "363 364 0 3 Asim, Mr. Adola male 35.0 \n", + "\n", + " SibSp Parch Ticket Fare Cabin Embarked \n", + "111 1 0 2665 14.4542 NaN C \n", + "211 0 0 F.C.C. 13528 21.0000 NaN S \n", + "264 0 0 382649 7.7500 NaN Q \n", + "363 0 0 SOTON/O.Q. 3101310 7.0500 NaN S " + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.sample(frac=.005)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "complex-transcript", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>753</th>\n", + " <td>754</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Jonkoff, Mr. Lalio</td>\n", + " <td>male</td>\n", + " <td>23.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>349204</td>\n", + " <td>7.8958</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>558</th>\n", + " <td>559</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Taussig, Mrs. Emil (Tillie Mandelbaum)</td>\n", + " <td>female</td>\n", + " <td>39.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>110413</td>\n", + " <td>79.6500</td>\n", + " <td>E67</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>374</th>\n", + " <td>375</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Palsson, Miss. Stina Viola</td>\n", + " <td>female</td>\n", + " <td>3.0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>349909</td>\n", + " <td>21.0750</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>61</th>\n", + " <td>62</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Icard, Miss. Amelie</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>113572</td>\n", + " <td>80.0000</td>\n", + " <td>B28</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass Name \\\n", + "753 754 0 3 Jonkoff, Mr. Lalio \n", + "558 559 1 1 Taussig, Mrs. Emil (Tillie Mandelbaum) \n", + "374 375 0 3 Palsson, Miss. Stina Viola \n", + "61 62 1 1 Icard, Miss. Amelie \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", + "753 male 23.0 0 0 349204 7.8958 NaN S \n", + "558 female 39.0 1 1 110413 79.6500 E67 S \n", + "374 female 3.0 3 1 349909 21.0750 NaN S \n", + "61 female 38.0 0 0 113572 80.0000 B28 NaN " + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.sample(frac=.005)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "regulated-ontario", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>456</th>\n", + " <td>457</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Millet, Mr. Francis Davis</td>\n", + " <td>male</td>\n", + " <td>65.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>13509</td>\n", + " <td>26.550</td>\n", + " <td>E38</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>351</th>\n", + " <td>352</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Williams-Lambert, Mr. Fletcher Fellows</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>113510</td>\n", + " <td>35.000</td>\n", + " <td>C128</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>174</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Sivola, Mr. Antti Wilhelm</td>\n", + " <td>male</td>\n", + " <td>21.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O 2. 3101280</td>\n", + " <td>7.925</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>671</th>\n", + " <td>672</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Davidson, Mr. Thornton</td>\n", + " <td>male</td>\n", + " <td>31.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>F.C. 12750</td>\n", + " <td>52.000</td>\n", + " <td>B71</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass Name \\\n", + "456 457 0 1 Millet, Mr. Francis Davis \n", + "351 352 0 1 Williams-Lambert, Mr. Fletcher Fellows \n", + "173 174 0 3 Sivola, Mr. Antti Wilhelm \n", + "671 672 0 1 Davidson, Mr. Thornton \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", + "456 male 65.0 0 0 13509 26.550 E38 S \n", + "351 male NaN 0 0 113510 35.000 C128 S \n", + "173 male 21.0 0 0 STON/O 2. 3101280 7.925 NaN S \n", + "671 male 31.0 1 0 F.C. 12750 52.000 B71 S " + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.sample(frac=.005, random_state=12)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "fifty-cutting", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>456</th>\n", + " <td>457</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Millet, Mr. Francis Davis</td>\n", + " <td>male</td>\n", + " <td>65.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>13509</td>\n", + " <td>26.550</td>\n", + " <td>E38</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>351</th>\n", + " <td>352</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Williams-Lambert, Mr. Fletcher Fellows</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>113510</td>\n", + " <td>35.000</td>\n", + " <td>C128</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>174</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Sivola, Mr. Antti Wilhelm</td>\n", + " <td>male</td>\n", + " <td>21.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O 2. 3101280</td>\n", + " <td>7.925</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>671</th>\n", + " <td>672</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Davidson, Mr. Thornton</td>\n", + " <td>male</td>\n", + " <td>31.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>F.C. 12750</td>\n", + " <td>52.000</td>\n", + " <td>B71</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass Name \\\n", + "456 457 0 1 Millet, Mr. Francis Davis \n", + "351 352 0 1 Williams-Lambert, Mr. Fletcher Fellows \n", + "173 174 0 3 Sivola, Mr. Antti Wilhelm \n", + "671 672 0 1 Davidson, Mr. Thornton \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", + "456 male 65.0 0 0 13509 26.550 E38 S \n", + "351 male NaN 0 0 113510 35.000 C128 S \n", + "173 male 21.0 0 0 STON/O 2. 3101280 7.925 NaN S \n", + "671 male 31.0 1 0 F.C. 12750 52.000 B71 S " + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.sample(frac=.005, random_state=12)" + ] + }, + { + "cell_type": "markdown", + "id": "introductory-domestic", + "metadata": {}, + "source": [ + "### isin\n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isin.html" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "fuzzy-nepal", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>174</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Sivola, Mr. Antti Wilhelm</td>\n", + " <td>male</td>\n", + " <td>21.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O 2. 3101280</td>\n", + " <td>7.925</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>351</th>\n", + " <td>352</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Williams-Lambert, Mr. Fletcher Fellows</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>113510</td>\n", + " <td>35.000</td>\n", + " <td>C128</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>456</th>\n", + " <td>457</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>Millet, Mr. Francis Davis</td>\n", + " <td>male</td>\n", + " <td>65.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>13509</td>\n", + " <td>26.550</td>\n", + " <td>E38</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass Name \\\n", + "173 174 0 3 Sivola, Mr. Antti Wilhelm \n", + "351 352 0 1 Williams-Lambert, Mr. Fletcher Fellows \n", + "456 457 0 1 Millet, Mr. Francis Davis \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", + "173 male 21.0 0 0 STON/O 2. 3101280 7.925 NaN S \n", + "351 male NaN 0 0 113510 35.000 C128 S \n", + "456 male 65.0 0 0 13509 26.550 E38 S " + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic[titanic['PassengerId'].isin([457, 352, 174])]" + ] + }, + { + "cell_type": "markdown", + "id": "tested-stretch", + "metadata": {}, + "source": [ + "### where\n", + "\n", + "Where cond is **True**, **keep the original** value.<br />\n", + "Where **False**, **replace** with corresponding value from other.\n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.where.html" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "persistent-processor", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-4</td>\n", + " <td>-3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>-2</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B\n", + "0 -4 -3\n", + "1 -2 -1\n", + "2 0 1\n", + "3 2 3\n", + "4 4 5" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.arange(-4, 6).reshape(-1, 2), columns=['A', 'B'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "informative-accident", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-4</td>\n", + " <td>-3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>-2</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B\n", + "0 -4 -3\n", + "1 -2 -1\n", + "2 0 0\n", + "3 0 0\n", + "4 0 0" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.where(df < 0 , 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "usual-soundtrack", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>4</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B\n", + "0 4 3\n", + "1 2 1\n", + "2 0 1\n", + "3 2 3\n", + "4 4 5" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.where(df > 0 , -df)" + ] + }, + { + "cell_type": "markdown", + "id": "sudden-biography", + "metadata": {}, + "source": [ + "### mask\n", + "\n", + "Replace values where the condition is True (the opposite of `where`)\n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.where.html" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "informative-bahamas", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B\n", + "0 0 0\n", + "1 0 0\n", + "2 0 1\n", + "3 2 3\n", + "4 4 5" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mask(df < 0, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "minute-marsh", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>4</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B\n", + "0 4 3\n", + "1 2 1\n", + "2 0 1\n", + "3 2 3\n", + "4 4 5" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mask(df < 0, -df)" + ] + }, + { + "cell_type": "markdown", + "id": "green-creator", + "metadata": {}, + "source": [ + "### query\n", + "\n", + "Query the columns of a DataFrame with a boolean expression.\n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "listed-blackberry", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>9</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)</td>\n", + " <td>female</td>\n", + " <td>27.0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>347742</td>\n", + " <td>11.1333</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>10</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>Nasser, Mrs. Nicholas (Adele Achem)</td>\n", + " <td>female</td>\n", + " <td>14.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>237736</td>\n", + " <td>30.0708</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>875</th>\n", + " <td>876</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Najib, Miss. Adele Kiamie \"Jane\"</td>\n", + " <td>female</td>\n", + " <td>15.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2667</td>\n", + " <td>7.2250</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>879</th>\n", + " <td>880</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)</td>\n", + " <td>female</td>\n", + " <td>56.0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>11767</td>\n", + " <td>83.1583</td>\n", + " <td>C50</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>880</th>\n", + " <td>881</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>Shelley, Mrs. William (Imanita Parrish Hall)</td>\n", + " <td>female</td>\n", + " <td>25.0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>230433</td>\n", + " <td>26.0000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>887</th>\n", + " <td>888</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Graham, Miss. Margaret Edith</td>\n", + " <td>female</td>\n", + " <td>19.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>112053</td>\n", + " <td>30.0000</td>\n", + " <td>B42</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>889</th>\n", + " <td>890</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Behr, Mr. Karl Howell</td>\n", + " <td>male</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>111369</td>\n", + " <td>30.0000</td>\n", + " <td>C148</td>\n", + " <td>C</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>342 rows × 12 columns</p>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + ".. ... ... ... \n", + "875 876 1 3 \n", + "879 880 1 1 \n", + "880 881 1 2 \n", + "887 888 1 1 \n", + "889 890 1 1 \n", + "\n", + " Name Sex Age SibSp \\\n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + ".. ... ... ... ... \n", + "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "8 2 347742 11.1333 NaN S \n", + "9 0 237736 30.0708 NaN C \n", + ".. ... ... ... ... ... \n", + "875 0 2667 7.2250 NaN C \n", + "879 1 11767 83.1583 C50 C \n", + "880 1 230433 26.0000 NaN S \n", + "887 0 112053 30.0000 B42 S \n", + "889 0 111369 30.0000 C148 C \n", + "\n", + "[342 rows x 12 columns]" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.query(\"Survived == 1\")" + ] + }, + { + "cell_type": "markdown", + "id": "infinite-bankruptcy", + "metadata": {}, + "source": [ + "Composing with \"and\" (`&`) \"or\" (`|`) operators:" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "compressed-footage", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>9</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)</td>\n", + " <td>female</td>\n", + " <td>27.0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>347742</td>\n", + " <td>11.1333</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>10</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>Nasser, Mrs. Nicholas (Adele Achem)</td>\n", + " <td>female</td>\n", + " <td>14.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>237736</td>\n", + " <td>30.0708</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>874</th>\n", + " <td>875</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>Abelson, Mrs. Samuel (Hannah Wizosky)</td>\n", + " <td>female</td>\n", + " <td>28.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>P/PP 3381</td>\n", + " <td>24.0000</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>875</th>\n", + " <td>876</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Najib, Miss. Adele Kiamie \"Jane\"</td>\n", + " <td>female</td>\n", + " <td>15.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2667</td>\n", + " <td>7.2250</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>879</th>\n", + " <td>880</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)</td>\n", + " <td>female</td>\n", + " <td>56.0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>11767</td>\n", + " <td>83.1583</td>\n", + " <td>C50</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>880</th>\n", + " <td>881</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>Shelley, Mrs. William (Imanita Parrish Hall)</td>\n", + " <td>female</td>\n", + " <td>25.0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>230433</td>\n", + " <td>26.0000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>887</th>\n", + " <td>888</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Graham, Miss. Margaret Edith</td>\n", + " <td>female</td>\n", + " <td>19.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>112053</td>\n", + " <td>30.0000</td>\n", + " <td>B42</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>233 rows × 12 columns</p>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + ".. ... ... ... \n", + "874 875 1 2 \n", + "875 876 1 3 \n", + "879 880 1 1 \n", + "880 881 1 2 \n", + "887 888 1 1 \n", + "\n", + " Name Sex Age SibSp \\\n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + ".. ... ... ... ... \n", + "874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n", + "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "8 2 347742 11.1333 NaN S \n", + "9 0 237736 30.0708 NaN C \n", + ".. ... ... ... ... ... \n", + "874 0 P/PP 3381 24.0000 NaN C \n", + "875 0 2667 7.2250 NaN C \n", + "879 1 11767 83.1583 C50 C \n", + "880 1 230433 26.0000 NaN S \n", + "887 0 112053 30.0000 B42 S \n", + "\n", + "[233 rows x 12 columns]" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.query(\"Survived == 1 & Sex == 'female'\")" + ] + }, + { + "cell_type": "markdown", + "id": "exterior-workstation", + "metadata": {}, + "source": [ + "You can refer to variables in the environment by prefixing them with an ‘@’ character " + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "removable-gather", + "metadata": {}, + "outputs": [], + "source": [ + "vips = titanic.Name.sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "fleet-modeling", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "79 Dowdell, Miss. Elizabeth\n", + "354 Yousif, Mr. Wazli\n", + "495 Yousseff, Mr. Gerious\n", + "173 Sivola, Mr. Antti Wilhelm\n", + "615 Herman, Miss. Alice\n", + "614 Brocklebank, Mr. William Alfred\n", + "735 Williams, Mr. Leslie\n", + "666 Butler, Mr. Reginald Fenton\n", + "617 Lobb, Mrs. William Arthur (Cordelia K Stanlick)\n", + "839 Marechal, Mr. Pierre\n", + "Name: Name, dtype: object" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vips" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "opposite-score", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>79</th>\n", + " <td>80</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Dowdell, Miss. Elizabeth</td>\n", + " <td>female</td>\n", + " <td>30.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>364516</td>\n", + " <td>12.4750</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>174</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Sivola, Mr. Antti Wilhelm</td>\n", + " <td>male</td>\n", + " <td>21.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O 2. 3101280</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>354</th>\n", + " <td>355</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Yousif, Mr. Wazli</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2647</td>\n", + " <td>7.2250</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>495</th>\n", + " <td>496</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Yousseff, Mr. Gerious</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2627</td>\n", + " <td>14.4583</td>\n", + " <td>NaN</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>614</th>\n", + " <td>615</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Brocklebank, Mr. William Alfred</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>364512</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>615</th>\n", + " <td>616</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>Herman, Miss. Alice</td>\n", + " <td>female</td>\n", + " <td>24.0</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>220845</td>\n", + " <td>65.0000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>617</th>\n", + " <td>618</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Lobb, Mrs. William Arthur (Cordelia K Stanlick)</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5. 3336</td>\n", + " <td>16.1000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>666</th>\n", + " <td>667</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>Butler, Mr. Reginald Fenton</td>\n", + " <td>male</td>\n", + " <td>25.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>234686</td>\n", + " <td>13.0000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>735</th>\n", + " <td>736</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Williams, Mr. Leslie</td>\n", + " <td>male</td>\n", + " <td>28.5</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>54636</td>\n", + " <td>16.1000</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>839</th>\n", + " <td>840</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Marechal, Mr. Pierre</td>\n", + " <td>male</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>11774</td>\n", + " <td>29.7000</td>\n", + " <td>C47</td>\n", + " <td>C</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "79 80 1 3 \n", + "173 174 0 3 \n", + "354 355 0 3 \n", + "495 496 0 3 \n", + "614 615 0 3 \n", + "615 616 1 2 \n", + "617 618 0 3 \n", + "666 667 0 2 \n", + "735 736 0 3 \n", + "839 840 1 1 \n", + "\n", + " Name Sex Age SibSp \\\n", + "79 Dowdell, Miss. Elizabeth female 30.0 0 \n", + "173 Sivola, Mr. Antti Wilhelm male 21.0 0 \n", + "354 Yousif, Mr. Wazli male NaN 0 \n", + "495 Yousseff, Mr. Gerious male NaN 0 \n", + "614 Brocklebank, Mr. William Alfred male 35.0 0 \n", + "615 Herman, Miss. Alice female 24.0 1 \n", + "617 Lobb, Mrs. William Arthur (Cordelia K Stanlick) female 26.0 1 \n", + "666 Butler, Mr. Reginald Fenton male 25.0 0 \n", + "735 Williams, Mr. Leslie male 28.5 0 \n", + "839 Marechal, Mr. Pierre male NaN 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "79 0 364516 12.4750 NaN S \n", + "173 0 STON/O 2. 3101280 7.9250 NaN S \n", + "354 0 2647 7.2250 NaN C \n", + "495 0 2627 14.4583 NaN C \n", + "614 0 364512 8.0500 NaN S \n", + "615 2 220845 65.0000 NaN S \n", + "617 0 A/5. 3336 16.1000 NaN S \n", + "666 0 234686 13.0000 NaN S \n", + "735 0 54636 16.1000 NaN S \n", + "839 0 11774 29.7000 C47 C " + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.query(\"Name in @vips\")" + ] + }, + { + "cell_type": "markdown", + "id": "apart-glossary", + "metadata": {}, + "source": [ + "### drop_duplicates\n", + "\n", + "Return DataFrame with duplicate rows removed.\n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "optional-surfing", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>brand</th>\n", + " <th>style</th>\n", + " <th>rating</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Yum Yum</td>\n", + " <td>cup</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Yum Yum</td>\n", + " <td>cup</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Indomie</td>\n", + " <td>cup</td>\n", + " <td>3.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Indomie</td>\n", + " <td>pack</td>\n", + " <td>15.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Indomie</td>\n", + " <td>pack</td>\n", + " <td>5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " brand style rating\n", + "0 Yum Yum cup 4.0\n", + "1 Yum Yum cup 4.0\n", + "2 Indomie cup 3.5\n", + "3 Indomie pack 15.0\n", + "4 Indomie pack 5.0" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({\n", + " 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],\n", + " 'style': ['cup', 'cup', 'cup', 'pack', 'pack'],\n", + " 'rating': [4, 4, 3.5, 15, 5]\n", + "})\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "bulgarian-improvement", + "metadata": {}, + "source": [ + "By default, it removes duplicate rows based on all columns:" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "becoming-carbon", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>brand</th>\n", + " <th>style</th>\n", + " <th>rating</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Yum Yum</td>\n", + " <td>cup</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Indomie</td>\n", + " <td>cup</td>\n", + " <td>3.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Indomie</td>\n", + " <td>pack</td>\n", + " <td>15.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Indomie</td>\n", + " <td>pack</td>\n", + " <td>5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " brand style rating\n", + "0 Yum Yum cup 4.0\n", + "2 Indomie cup 3.5\n", + "3 Indomie pack 15.0\n", + "4 Indomie pack 5.0" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop_duplicates()" + ] + }, + { + "cell_type": "markdown", + "id": "supreme-master", + "metadata": {}, + "source": [ + "To remove duplicates on specific column(s), use subset:" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "social-pottery", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>brand</th>\n", + " <th>style</th>\n", + " <th>rating</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Yum Yum</td>\n", + " <td>cup</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Indomie</td>\n", + " <td>cup</td>\n", + " <td>3.5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " brand style rating\n", + "0 Yum Yum cup 4.0\n", + "2 Indomie cup 3.5" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop_duplicates(subset=['brand'])" + ] + }, + { + "cell_type": "markdown", + "id": "guided-feeling", + "metadata": {}, + "source": [ + "To remove duplicates and keep last occurrences, use keep:" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "incoming-equipment", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>brand</th>\n", + " <th>style</th>\n", + " <th>rating</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Yum Yum</td>\n", + " <td>cup</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Indomie</td>\n", + " <td>cup</td>\n", + " <td>3.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Indomie</td>\n", + " <td>pack</td>\n", + " <td>5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " brand style rating\n", + "1 Yum Yum cup 4.0\n", + "2 Indomie cup 3.5\n", + "4 Indomie pack 5.0" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop_duplicates(subset=['brand', 'style'], keep='last')" + ] + }, + { + "cell_type": "markdown", + "id": "vulnerable-hartford", + "metadata": {}, + "source": [ + "## Group data" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "verified-conservative", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "##################### 1 #########################\n", + " PassengerId Survived Pclass Sex Age\n", + "1 2 1 1 female 38.0\n", + "3 4 1 1 female 35.0\n", + "6 7 0 1 male 54.0\n", + "11 12 1 1 female 58.0\n", + "23 24 1 1 male 28.0\n", + ".. ... ... ... ... ...\n", + "871 872 1 1 female 47.0\n", + "872 873 0 1 male 33.0\n", + "879 880 1 1 female 56.0\n", + "887 888 1 1 female 19.0\n", + "889 890 1 1 male 26.0\n", + "\n", + "[216 rows x 5 columns]\n", + "##################### 2 #########################\n", + " PassengerId Survived Pclass Sex Age\n", + "9 10 1 2 female 14.0\n", + "15 16 1 2 female 55.0\n", + "17 18 1 2 male NaN\n", + "20 21 0 2 male 35.0\n", + "21 22 1 2 male 34.0\n", + ".. ... ... ... ... ...\n", + "866 867 1 2 female 27.0\n", + "874 875 1 2 female 28.0\n", + "880 881 1 2 female 25.0\n", + "883 884 0 2 male 28.0\n", + "886 887 0 2 male 27.0\n", + "\n", + "[184 rows x 5 columns]\n", + "##################### 3 #########################\n", + " PassengerId Survived Pclass Sex Age\n", + "0 1 0 3 male 22.0\n", + "2 3 1 3 female 26.0\n", + "4 5 0 3 male 35.0\n", + "5 6 0 3 male NaN\n", + "7 8 0 3 male 2.0\n", + ".. ... ... ... ... ...\n", + "882 883 0 3 female 22.0\n", + "884 885 0 3 male 25.0\n", + "885 886 0 3 female 39.0\n", + "888 889 0 3 female NaN\n", + "890 891 0 3 male 32.0\n", + "\n", + "[491 rows x 5 columns]\n" + ] + } + ], + "source": [ + "for p_class, df in titanic.groupby('Pclass'):\n", + " print(f\"##################### {p_class} #########################\")\n", + " print(df[['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age']])" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "alternate-pepper", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "##################### 1 #########################\n", + "================== female =================\n", + " Survived Age\n", + "count 94.000000 85.000000\n", + "mean 0.968085 34.611765\n", + "std 0.176716 13.612052\n", + "min 0.000000 2.000000\n", + "25% 1.000000 23.000000\n", + "50% 1.000000 35.000000\n", + "75% 1.000000 44.000000\n", + "max 1.000000 63.000000\n", + "================== male =================\n", + " Survived Age\n", + "count 122.000000 101.000000\n", + "mean 0.368852 41.281386\n", + "std 0.484484 15.139570\n", + "min 0.000000 0.920000\n", + "25% 0.000000 30.000000\n", + "50% 0.000000 40.000000\n", + "75% 1.000000 51.000000\n", + "max 1.000000 80.000000\n", + "##################### 2 #########################\n", + "================== female =================\n", + " Survived Age\n", + "count 76.000000 74.000000\n", + "mean 0.921053 28.722973\n", + "std 0.271448 12.872702\n", + "min 0.000000 2.000000\n", + "25% 1.000000 22.250000\n", + "50% 1.000000 28.000000\n", + "75% 1.000000 36.000000\n", + "max 1.000000 57.000000\n", + "================== male =================\n", + " Survived Age\n", + "count 108.000000 99.000000\n", + "mean 0.157407 30.740707\n", + "std 0.365882 14.793894\n", + "min 0.000000 0.670000\n", + "25% 0.000000 23.000000\n", + "50% 0.000000 30.000000\n", + "75% 0.000000 36.750000\n", + "max 1.000000 70.000000\n", + "##################### 3 #########################\n", + "================== female =================\n", + " Survived Age\n", + "count 144.000000 102.000000\n", + "mean 0.500000 21.750000\n", + "std 0.501745 12.729964\n", + "min 0.000000 0.750000\n", + "25% 0.000000 14.125000\n", + "50% 0.500000 21.500000\n", + "75% 1.000000 29.750000\n", + "max 1.000000 63.000000\n", + "================== male =================\n", + " Survived Age\n", + "count 347.000000 253.000000\n", + "mean 0.135447 26.507589\n", + "std 0.342694 12.159514\n", + "min 0.000000 0.420000\n", + "25% 0.000000 20.000000\n", + "50% 0.000000 25.000000\n", + "75% 0.000000 33.000000\n", + "max 1.000000 74.000000\n" + ] + } + ], + "source": [ + "for p_class, df in titanic.groupby('Pclass'):\n", + " print(f\"##################### {p_class} #########################\")\n", + " for sex, df2 in df.groupby('Sex'):\n", + " print(f\"================== {sex} =================\")\n", + " print(df2[['Survived', 'Age']].describe())" + ] + }, + { + "cell_type": "markdown", + "id": "toxic-madagascar", + "metadata": {}, + "source": [ + "## Table Concatenation/Merging\n", + "\n", + "> https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html\n", + "> https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.merge.html" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "assumed-driving", + "metadata": {}, + "outputs": [], + "source": [ + "table_1 = pd.DataFrame({'gene_ID':[1,12,3],\n", + " 'species': ['HUMAN', 'RAT', 'HORSE']})\n", + "table_2 = pd.DataFrame({'gene_ID':[12,3,1],\n", + " 'effect': [12, 33, 45]})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "artificial-senegal", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>species</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>HUMAN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>RAT</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>HORSE</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID species\n", + "0 1 HUMAN\n", + "1 12 RAT\n", + "2 3 HORSE" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table_1" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "adjustable-hamburg", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1</td>\n", + " <td>45</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID effect\n", + "0 12 12\n", + "1 3 33\n", + "2 1 45" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table_2" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "focal-wrist", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>species</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>HUMAN</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>RAT</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>HORSE</td>\n", + " <td>33</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID species effect\n", + "0 1 HUMAN 45\n", + "1 12 RAT 12\n", + "2 3 HORSE 33" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(table_1, table_2, on='gene_ID')" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "loved-raise", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>ref</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1</td>\n", + " <td>45</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " ref effect\n", + "0 12 12\n", + "1 3 33\n", + "2 1 45" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table_3 = pd.DataFrame({'ref':[12,3,1],\n", + " 'effect': [12, 33, 45]})\n", + "table_3" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "finished-profile", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>species</th>\n", + " <th>ref</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>HUMAN</td>\n", + " <td>1</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>RAT</td>\n", + " <td>12</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>HORSE</td>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID species ref effect\n", + "0 1 HUMAN 1 45\n", + "1 12 RAT 12 12\n", + "2 3 HORSE 3 33" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(table_1, table_3, left_on='gene_ID', right_on='ref')" + ] + }, + { + "cell_type": "markdown", + "id": "digital-blowing", + "metadata": {}, + "source": [ + "### Effect of *how* parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "olive-punch", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>specie</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>HUMAN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>RAT</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>HORSE</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>42</td>\n", + " <td>MONKEY</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID specie\n", + "0 1 HUMAN\n", + "1 12 RAT\n", + "2 3 HORSE\n", + "3 42 MONKEY" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table_4 = pd.DataFrame({'gene_ID':[1,12,3, 42],\n", + " 'specie': ['HUMAN', 'RAT', 'HORSE', 'MONKEY']})\n", + "table_4" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "attached-jimmy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>ref</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>35</td>\n", + " <td>100</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " ref effect\n", + "0 12 12\n", + "1 3 33\n", + "2 1 45\n", + "3 35 100" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table_5 = pd.DataFrame({'ref':[12,3,1, 35],\n", + " 'effect': [12, 33, 45, 100]})\n", + "table_5" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "charged-tragedy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>specie</th>\n", + " <th>ref</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>HUMAN</td>\n", + " <td>1.0</td>\n", + " <td>45.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>RAT</td>\n", + " <td>12.0</td>\n", + " <td>12.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>HORSE</td>\n", + " <td>3.0</td>\n", + " <td>33.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>42</td>\n", + " <td>MONKEY</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID specie ref effect\n", + "0 1 HUMAN 1.0 45.0\n", + "1 12 RAT 12.0 12.0\n", + "2 3 HORSE 3.0 33.0\n", + "3 42 MONKEY NaN NaN" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(table_4, table_5, left_on='gene_ID', right_on='ref', how='left')" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "encouraging-speaking", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>specie</th>\n", + " <th>ref</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12.0</td>\n", + " <td>RAT</td>\n", + " <td>12</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3.0</td>\n", + " <td>HORSE</td>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1.0</td>\n", + " <td>HUMAN</td>\n", + " <td>1</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>35</td>\n", + " <td>100</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID specie ref effect\n", + "0 12.0 RAT 12 12\n", + "1 3.0 HORSE 3 33\n", + "2 1.0 HUMAN 1 45\n", + "3 NaN NaN 35 100" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(table_4, table_5, left_on='gene_ID', right_on='ref', how='right')" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "acquired-magnitude", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>specie</th>\n", + " <th>ref</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>HUMAN</td>\n", + " <td>1</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>RAT</td>\n", + " <td>12</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>HORSE</td>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID specie ref effect\n", + "0 1 HUMAN 1 45\n", + "1 12 RAT 12 12\n", + "2 3 HORSE 3 33" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(table_4, table_5, left_on='gene_ID', right_on='ref', how='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "imported-candle", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>gene_ID</th>\n", + " <th>specie</th>\n", + " <th>ref</th>\n", + " <th>effect</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1.0</td>\n", + " <td>HUMAN</td>\n", + " <td>1.0</td>\n", + " <td>45.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12.0</td>\n", + " <td>RAT</td>\n", + " <td>12.0</td>\n", + " <td>12.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3.0</td>\n", + " <td>HORSE</td>\n", + " <td>3.0</td>\n", + " <td>33.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>42.0</td>\n", + " <td>MONKEY</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>35.0</td>\n", + " <td>100.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " gene_ID specie ref effect\n", + "0 1.0 HUMAN 1.0 45.0\n", + "1 12.0 RAT 12.0 12.0\n", + "2 3.0 HORSE 3.0 33.0\n", + "3 42.0 MONKEY NaN NaN\n", + "4 NaN NaN 35.0 100.0" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(table_4, table_5, left_on='gene_ID', right_on='ref', how='outer')" + ] + }, + { + "cell_type": "markdown", + "id": "looking-price", + "metadata": {}, + "source": [ + "## Crosstab\n", + "\n", + "Compute a simple cross tabulation of two (or more) factors. By default computes a frequency table of the factors \n", + "\n", + "> https://pandas.pydata.org/docs/reference/api/pandas.crosstab.html" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "determined-compromise", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>Pclass</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Age</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0.42</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0.67</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0.75</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0.83</th>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0.92</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>70.00</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>70.50</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>71.00</th>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74.00</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>80.00</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>88 rows × 3 columns</p>\n", + "</div>" + ], + "text/plain": [ + "Pclass 1 2 3\n", + "Age \n", + "0.42 0 0 1\n", + "0.67 0 1 0\n", + "0.75 0 0 2\n", + "0.83 0 2 0\n", + "0.92 1 0 0\n", + "... .. .. ..\n", + "70.00 1 1 0\n", + "70.50 0 0 1\n", + "71.00 2 0 0\n", + "74.00 0 0 1\n", + "80.00 1 0 0\n", + "\n", + "[88 rows x 3 columns]" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(index=titanic.Age, columns=titanic.Pclass)" + ] + }, + { + "cell_type": "markdown", + "id": "differential-solomon", + "metadata": {}, + "source": [ + "## Saving data\n", + "\n", + "To **csv** or **tsv** files:\n", + "\n", + "```python\n", + "df.to_csv(<path to file>, sep='\\t', index=False)\n", + "```\n", + "\n", + "If needed, you can even produce **xlsx** files with multiple spreadsheets:\n", + "\n", + "```python\n", + "with pd.ExcelWriter(\"multi_sheet_excel.xlsx\") as writer:\n", + " df.to_excel(writer, sheet_name=\"stocks1\")\n", + " df2.to_excel(writer, sheet_name=\"stocks2\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "photographic-citizen", + "metadata": {}, + "source": [ + "# Teasing\n", + "\n", + "pandas use matplotlib to display graphics" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "martial-lover", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<AxesSubplot:>" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAARC0lEQVR4nO3dX4xcZ33G8e+PlDbGC7HdJCvXQTWoVgrKNgFvQ2iqajemyCQI56KJggAZKZUvCjRUrqjTSq24qOqLBtELVNUCGqulWVJIGiuVgGjJFKWqCmsI2KmThhLLxEltSG3DuhHF4deLPYuHze78n53z2t+PtJo5Z8+ZeTI7fvLuO+ecjcxEklSeV4w6gCSpNxa4JBXKApekQlngklQoC1ySCvVzq/lkl19+eW7evLnr/c6ePcvatWsHH6hP5upOXXNBfbOZqzt1zQX9ZTt48OD3M/OKl30jM1fta+vWrdmLRx99tKf9hs1c3alrrsz6ZjNXd+qaK7O/bMBcLtOpTqFIUqEscEkqlAUuSYWywCWpUBa4JBXKApekQlngklQoC1ySCmWBS1KhVvVUeq2ezXv+ueX3j+69ZZWSSBoWR+CSVCgLXJIKZYFLUqEscEkqVEcfYkbEUeCHwEvAucycjIgNwGeBzcBR4PbMPDWcmJKkpboZgU9n5nWZOVkt7wFmM3MLMFstS5JWST9TKDuA/dX9/cCtfaeRJHUsFv7YQ5uNIp4BTgEJ/E1m7ouI05m5rmmbU5m5fpl9dwG7AMbHx7fOzMx0HXJ+fp6xsbGu9xu2Oud65sxLLbeZ2HTZKqU5r66vF9Q3m7m6U9dc0F+26enpg02zHz/V6Yk8N2bmcxFxJfBIRDzZ6RNn5j5gH8Dk5GROTU11uutPNRoNetlv2Oqc657Hzrbc5uh7plYnTJO6vl5Q32zm6k5dc8FwsnU0hZKZz1W3J4EHgeuBExGxEaC6PTnQZJKkltoWeESsjYhXL94H3g4cBg4AO6vNdgIPDSukJOnlOplCGQcejIjF7f8hM78QEV8D7o+IO4FjwG3DiylJWqptgWfmd4Brl1n/ArBtGKEkSe15JqYkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlH8TUwO30t/j3D1xjqnVjSJd0ByBS1KhLHBJKpQFLkmFcg78IrXSPDXA0b23rGISSb1yBC5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpUBa4JBXKa6Goa62uoyJp9TgCl6RCWeCSVCgLXJIK5Ry4XsY5bqkMjsAlqVAWuCQVygKXpEJ1PAceEZcAc8DxzHxnRGwAPgtsBo4Ct2fmqWGElBb5tzyl87oZgd8FHGla3gPMZuYWYLZaliStko4KPCKuAm4BPtm0egewv7q/H7h1oMkkSS11OgL/OPAR4CdN68Yz83mA6vbKwUaTJLUSmdl6g4h3Ajdn5u9FxBTwh9Uc+OnMXNe03anMXL/M/ruAXQDj4+NbZ2Zmug45Pz/P2NhY1/sN26hzHTp+Ztn142vgxIurHKYD42vgyg2X9fUYK/03A0xs6v2xR/2zXIm5ulPXXNBftunp6YOZObl0fScF/hfA+4BzwKXAa4AHgF8HpjLz+YjYCDQy8+pWjzU5OZlzc3Ndh280GkxNTXW937CNOtdKH+jtnjjHPYfqd47W7olzfOg9O/p6jGF9iDnqn+VKzNWduuaC/rJFxLIF3nYKJTPvzsyrMnMzcAfw5cx8L3AA2FltthN4qKdkkqSe9DNM2wvcHxF3AseA2wYTSYs8pV1SK10VeGY2gEZ1/wVg2+AjSZI64ZmYklQoC1ySCmWBS1KhLHBJKpQFLkmFssAlqVD1O11PF7R2x7Z7SVipc47AJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpUBa4JBXKApekQlngklQoC1ySCmWBS1KhLHBJKpQFLkmFssAlqVAWuCQVygKXpEL5NzFVK+3+Zqak8xyBS1KhLHBJKpQFLkmFssAlqVBtCzwiLo2Ir0bENyPiiYj4aLV+Q0Q8EhFPV7frhx9XkrSokxH4j4CbMvNa4Dpge0TcAOwBZjNzCzBbLUuSVknbAs8F89XiK6uvBHYA+6v1+4FbhxFQkrS8yMz2G0VcAhwEfgX4RGb+UUSczsx1TducysyXTaNExC5gF8D4+PjWmZmZrkPOz88zNjbW9X7DNuxch46f6Wm/8TVw4sUBhxmAYeea2HRZz/terO+xXpmre/1km56ePpiZk0vXd1TgP904Yh3wIPAh4LFOCrzZ5ORkzs3Ndfx8ixqNBlNTU13vN2zDztXrSS27J85xz6H6naM17FxH997S874X63usV+bqXj/ZImLZAu/qKJTMPA00gO3AiYjYWD34RuBkT8kkST3p5CiUK6qRNxGxBngb8CRwANhZbbYTeGhIGSVJy+jk99mNwP5qHvwVwP2Z+XBE/Btwf0TcCRwDbhtiTknSEm0LPDO/BbxpmfUvANuGEUqS1J5nYkpSoSxwSSqUBS5JhbLAJalQFrgkFcoCl6RC1e98a2lIWl2a4N7ta1cxiTQYjsAlqVAWuCQVygKXpEJZ4JJUKAtckgplgUtSoSxwSSqUBS5JhbLAJalQFrgkFcoCl6RCFXMtlFbXsQA4uveWVUoiSfXgCFySCmWBS1KhLHBJKlQxc+AXonbz+pLUiiNwSSqUBS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIK5XHgEnDo+Bne3+K4fK+1ozpqOwKPiNdGxKMRcSQinoiIu6r1GyLikYh4urpdP/y4kqRFnUyhnAN2Z+YbgBuAD0TEG4E9wGxmbgFmq2VJ0ippW+CZ+Xxmfr26/0PgCLAJ2AHsrzbbD9w6pIySpGVEZna+ccRm4CvANcCxzFzX9L1TmfmyaZSI2AXsAhgfH986MzPTdcj5+XmeOfNSy20mNl3W9eP2a35+nrGxsZ73P3T8zADTnDe+Bk68OJSH7suwc7V7D7R6vdtlG8X7C/p/jw2LubrXT7bp6emDmTm5dH3HBR4RY8C/AH+emQ9ExOlOCrzZ5ORkzs3NdZccaDQavP8LZ1tuM4oPmRqNBlNTUz3vP6yLWe2eOMc9h+r3+fSwc7V7D7R6vdtlG9WHmP2+x4bFXN3rJ1tELFvgHR1GGBGvBD4PfCYzH6hWn4iIjdX3NwIne0omSepJJ0ehBPAp4EhmfqzpWweAndX9ncBDg48nSVpJJ7/P3gi8DzgUEY9X6/4Y2AvcHxF3AseA24aSUJK0rLYFnpmPAbHCt7cNNo4kqVOeSi9JhbLAJalQ9TvWTOpRXf/GaLtcXmdFvXIELkmFssAlqVAWuCQVyjlwaQDqOv+uC5sjcEkqlAUuSYWywCWpUBfMHHirOUiPs1WdtXrv3rt97SomUWkcgUtSoSxwSSqUBS5JhbLAJalQFrgkFcoCl6RCWeCSVKgL5jhwaZi81onqyBG4JBXKApekQlngklQo58ClGjt0/Azv9zo/WoEjcEkqlAUuSYWywCWpUBa4JBXKApekQlngklQoC1ySCmWBS1Kh2hZ4RHw6Ik5GxOGmdRsi4pGIeLq6XT/cmJKkpToZgd8LbF+ybg8wm5lbgNlqWZK0itoWeGZ+BfifJat3APur+/uBWwcbS5LUTmRm+40iNgMPZ+Y11fLpzFzX9P1TmbnsNEpE7AJ2AYyPj2+dmZnpOuT8/DzPnHmp6/0WTWy6rOd9YeF6FMsZXwNXbuj9sVd63H6Nr4ETLw7loftS11xQ32ztcvX73u7V/Pw8Y2NjI3nuVuqaC/rLNj09fTAzJ5euH/rFrDJzH7APYHJyMqemprp+jEajwT2Pne05w9H3dP+czVa6mNDuiXPc3sN/T7vH7dfuiXPcc6h+1ymray6ob7Z2ufp9b/eq0WjQy7/lYatrLhhOtl6PQjkRERsBqtuTg4skSepErwV+ANhZ3d8JPDSYOJKkTrX9nTEi7gOmgMsj4lngz4C9wP0RcSdwDLhtmCElLa/d3+r0euEXtrYFnpnvXuFb2wacRZLUBc/ElKRCWeCSVKj6HTd1gWk3RylJvXIELkmFssAlqVAWuCQV6qKYA3ceWherft77rY4hP3T8TMtLQXj8+epwBC5JhbLAJalQFrgkFeqimAOX1L1W8+e7J1YxiFbkCFySCmWBS1KhLHBJKpRz4H3yGHNJo+IIXJIKZYFLUqEscEkqlHPgkgZuWNdg0c9yBC5JhbLAJalQFrgkFco5cEkXjIvtOuWOwCWpUBa4JBXKApekQjkHLqkoXqf8PEfgklQoC1ySCmWBS1KhnAOXdNFod42Wfo4Tb/fY925f2/Njr6SvEXhEbI+IpyLi2xGxZ1ChJEnt9VzgEXEJ8AngHcAbgXdHxBsHFUyS1Fo/I/DrgW9n5ncy8/+AGWDHYGJJktqJzOxtx4jfAbZn5u9Wy+8D3pKZH1yy3S5gV7V4NfBUD093OfD9noIOl7m6U9dcUN9s5upOXXNBf9l+OTOvWLqynw8xY5l1L/u/QWbuA/b18TxExFxmTvbzGMNgru7UNRfUN5u5ulPXXDCcbP1MoTwLvLZp+Srguf7iSJI61U+Bfw3YEhGvi4ifB+4ADgwmliSpnZ6nUDLzXER8EPgicAnw6cx8YmDJflZfUzBDZK7u1DUX1DebubpT11wwhGw9f4gpSRotT6WXpEJZ4JJUqFoXeJ1O1Y+IT0fEyYg43LRuQ0Q8EhFPV7frVznTayPi0Yg4EhFPRMRddchVZbg0Ir4aEd+ssn20LtmqHJdExDci4uG65IqIoxFxKCIej4i5GuVaFxGfi4gnq/faW2uS6+rqtVr8+kFEfLgm2f6get8fjoj7qn8PA89V2wKv4an69wLbl6zbA8xm5hZgtlpeTeeA3Zn5BuAG4APVazTqXAA/Am7KzGuB64DtEXFDTbIB3AUcaVquS67pzLyu6XjhOuT6K+ALmfmrwLUsvG4jz5WZT1Wv1XXAVuB/gQdHnS0iNgG/D0xm5jUsHORxx1ByZWYtv4C3Al9sWr4buHvEmTYDh5uWnwI2Vvc3Ak+NON9DwG/XMNergK8Db6lDNhbOWZgFbgIersvPEjgKXL5k3UhzAa8BnqE64KEuuZbJ+XbgX+uQDdgEfBfYwMKRfg9X+Qaeq7YjcM6/CIuerdbVyXhmPg9Q3V45qiARsRl4E/DvdclVTVM8DpwEHsnMumT7OPAR4CdN6+qQK4EvRcTB6hIUdcj1euB7wN9WU06fjIi1Nci11B3AfdX9kWbLzOPAXwLHgOeBM5n5pWHkqnOBd3SqviAixoDPAx/OzB+MOs+izHwpF369vQq4PiKuGXEkIuKdwMnMPDjqLMu4MTPfzMK04Qci4rdGHYiFEeSbgb/OzDcBZxnd9NKyqhMJ3wX846izAFRz2zuA1wG/BKyNiPcO47nqXOAlnKp/IiI2AlS3J1c7QES8koXy/kxmPlCXXM0y8zTQYOEzhFFnuxF4V0QcZeEKmjdFxN/XIBeZ+Vx1e5KFudzra5DrWeDZ6rcngM+xUOijztXsHcDXM/NEtTzqbG8DnsnM72Xmj4EHgN8YRq46F3gJp+ofAHZW93eyMAe9aiIigE8BRzLzY3XJVWW7IiLWVffXsPCmfnLU2TLz7sy8KjM3s/Ce+nJmvnfUuSJibUS8evE+C3Omh0edKzP/G/huRFxdrdoG/Meocy3xbs5Pn8Dosx0DboiIV1X/Rrex8MHv4HON8oOHDj4MuBn4T+C/gD8ZcZb7WJjP+jELo5I7gV9k4cOwp6vbDauc6TdZmFb6FvB49XXzqHNV2X4N+EaV7TDwp9X6kWdryjjF+Q8xR/2zfD3wzerricX3+6hzVRmuA+aqn+U/AevrkKvK9irgBeCypnUjzwZ8lIUBy2Hg74BfGEYuT6WXpELVeQpFktSCBS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIK9f+G8SKnugrwJQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "titanic.Age.hist(bins=40)" + ] + }, + { + "cell_type": "markdown", + "id": "pointed-transport", + "metadata": {}, + "source": [ + "# And so much more ...\n", + "\n", + "To name just a few:\n", + "\n", + "- [pivot_table](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.pivot_table.html)\n", + "- [apply](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html)\n", + "- [map](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.map.html)\n", + "- [json_normalize](https://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.io.json.json_normalize.html)\n", + "- [multiindex](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.MultiIndex.html)\n", + "- Combination of groupby with mean, max, aggregate or boxplots.\n", + "- ..." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:dev]", + "language": "python", + "name": "conda-env-dev-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab