0% found this document useful (0 votes)
16 views

4 Dataframe Operations - Ipynb

Uploaded by

Nidhi H Baraker
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views

4 Dataframe Operations - Ipynb

Uploaded by

Nidhi H Baraker
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 85

{

"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Operations"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random\n",
"import numpy as np\n",
"import matplotlib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09\n",
"R00 79 19 21 99 35 59 44 25 75 58\n",
"R01 25 39 89 66 9 41 6 69 63 3\n",
"R02 37 64 31 69 61 97 5 11 76 57\n",
"R03 74 61 100 6 58 80 95 50 15 51\n",
"R04 79 60 83 85 16 5 16 69 5 20\n",
"R05 45 26 73 73 100 60 21 19 95 12\n",
"R06 12 29 18 98 62 68 92 29 74 96\n",
"R07 36 32 22 4 66 25 63 51 59 14\n",
"R08 55 53 89 13 84 87 74 3 2 64\n",
"R09 46 74 36 54 21 12 68 33 80 25"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_json('./data/sampledf.json')\n",
"df.index = [\"R{:02d}\".format(i) for i in range(len(df))]\n",
"df.columns = [\"C{:02d}\".format(i) for i in range(len(df.columns))]\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Adding and deleting Series in a DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>18</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 71\n",
"R01 25 39 89 66 9 41 6 69 63 3 18\n",
"R02 37 64 31 69 61 97 5 11 76 57 89\n",
"R03 74 61 100 6 58 80 95 50 15 51 18\n",
"R04 79 60 83 85 16 5 16 69 5 20 1\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 74\n",
"R07 36 32 22 4 66 25 63 51 59 14 67\n",
"R08 55 53 89 13 84 87 74 3 2 64 5\n",
"R09 46 74 36 54 21 12 68 33 80 25 18"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C10'] = [random.randint(1, 100) for i in range(len(df))]\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 63\n",
"1 99\n",
"2 38\n",
"3 73\n",
"4 99\n",
"5 77\n",
"6 97\n",
"7 11\n",
"8 26\n",
"9 83\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# caveat when adding a Series:\n",
"new_series = pd.Series(list([random.randint(1, 100) for i in
range(len(df))]))\n",
"new_series"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 NaN\n",
"R01 25 39 89 66 9 41 6 69 63 3 NaN\n",
"R02 37 64 31 69 61 97 5 11 76 57 NaN\n",
"R03 74 61 100 6 58 80 95 50 15 51 NaN\n",
"R04 79 60 83 85 16 5 16 69 5 20 NaN\n",
"R05 45 26 73 73 100 60 21 19 95 12 NaN\n",
"R06 12 29 18 98 62 68 92 29 74 96 NaN\n",
"R07 36 32 22 4 66 25 63 51 59 14 NaN\n",
"R08 55 53 89 13 84 87 74 3 2 64 NaN\n",
"R09 46 74 36 54 21 12 68 33 80 25 NaN"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C10'] = new_series\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"R00 63\n",
"R01 99\n",
"R02 38\n",
"R03 73\n",
"R04 99\n",
"R05 77\n",
"R06 97\n",
"R07 11\n",
"R08 26\n",
"R09 83\n",
"dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# adding a new Series: mind the index of the DF\n",
"new_series.index = df.index\n",
"new_series"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C10'] = new_series\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" <th>C11</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11\n",
"R00 79 19 21 99 35 59 44 25 75 58 63 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83 83"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['C11'] = new_series\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"del df['C11']\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "KeyError",
"evalue": "'C12'",
"output_type": "error",
"traceback": [
"\
u001b[0;31m------------------------------------------------------------------------
---\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback
(most recent call last)",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/core/
indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method,
tolerance)\u001b[0m\n\u001b[1;32m 3360\u001b[0m \u001b[0;32mtry\
u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\
n\u001b[0;32m-> 3361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \
u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\
u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\
u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\
n\u001b[0m\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \
u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\
u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/_libs/
index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\
u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/_libs/
index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\
u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \
u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\
u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \
u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\
u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'C12'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback
(most recent call last)",

"\u001b[0;32m/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/39014
31452.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m---->
1\u001b[0;31m \u001b[0;32mdel\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\
u001b[0;34m'C12'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\
u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/core/
generic.py\u001b[0m in \u001b[0;36m__delitem__\u001b[0;34m(self, key)\u001b[0m\n\
u001b[1;32m 3961\u001b[0m \u001b[0;31m# there was no match, this call
should raise the appropriate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\
u001b[0m\n\u001b[1;32m 3962\u001b[0m \u001b[0;31m# exception:\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3963\
u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\
u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\
u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\
u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\
u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\
u001b[0m\u001b[1;32m 3964\u001b[0m \u001b[0mself\u001b[0m\
u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\
u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m\u001b[0;34m.\u001b[0m\
u001b[0midelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m)\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3965\
u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/pyds/lib/python3.9/site-packages/pandas/core/
indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method,
tolerance)\u001b[0m\n\u001b[1;32m 3361\u001b[0m \
u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\
u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\
u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\
u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3362\u001b[0m
\u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \
u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\
u001b[0m\u001b[0m\n\u001b[0;32m-> 3363\u001b[0;31m \
u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\
u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m
3364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3365\
u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\
u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \
u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\
u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\
u001b[0;34m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\
u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'C12'"
]
}
],
"source": [
"del df['C12']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# avoid KeyErrors if unsure Series exists\n",
"df.drop(['C12'], errors='ignore')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C03</th>\n",
" <th>C05</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>99</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>66</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>69</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>6</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>85</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>73</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>98</td>\n",
" <td>68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>4</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>13</td>\n",
" <td>87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>54</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C03 C05\n",
"R00 99 59\n",
"R01 66 41\n",
"R02 69 97\n",
"R03 6 80\n",
"R04 85 5\n",
"R05 73 60\n",
"R06 98 68\n",
"R07 4 25\n",
"R08 13 87\n",
"R09 54 12"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# getting a subset is simpler than deleting many columns\n",
"dfs = df.loc[:, ['C03', 'C05']]\n",
"dfs"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"dfs['C04'] = [random.randint(1, 100) for i in range(len(df))]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C03</th>\n",
" <th>C05</th>\n",
" <th>C04</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>99</td>\n",
" <td>59</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>66</td>\n",
" <td>41</td>\n",
" <td>89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>69</td>\n",
" <td>97</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>6</td>\n",
" <td>80</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>85</td>\n",
" <td>5</td>\n",
" <td>79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>73</td>\n",
" <td>60</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>98</td>\n",
" <td>68</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>4</td>\n",
" <td>25</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>13</td>\n",
" <td>87</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>54</td>\n",
" <td>12</td>\n",
" <td>15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C03 C05 C04\n",
"R00 99 59 47\n",
"R01 66 41 89\n",
"R02 69 97 67\n",
"R03 6 80 8\n",
"R04 85 5 79\n",
"R05 73 60 74\n",
"R06 98 68 23\n",
"R07 4 25 94\n",
"R08 13 87 97\n",
"R09 54 12 15"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfs"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C00</th>\n",
" <th>C01</th>\n",
" <th>C02</th>\n",
" <th>C03</th>\n",
" <th>C04</th>\n",
" <th>C05</th>\n",
" <th>C06</th>\n",
" <th>C07</th>\n",
" <th>C08</th>\n",
" <th>C09</th>\n",
" <th>C10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R00</th>\n",
" <td>79</td>\n",
" <td>19</td>\n",
" <td>21</td>\n",
" <td>99</td>\n",
" <td>35</td>\n",
" <td>59</td>\n",
" <td>44</td>\n",
" <td>25</td>\n",
" <td>75</td>\n",
" <td>58</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R01</th>\n",
" <td>25</td>\n",
" <td>39</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>69</td>\n",
" <td>63</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R02</th>\n",
" <td>37</td>\n",
" <td>64</td>\n",
" <td>31</td>\n",
" <td>69</td>\n",
" <td>61</td>\n",
" <td>97</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>76</td>\n",
" <td>57</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R03</th>\n",
" <td>74</td>\n",
" <td>61</td>\n",
" <td>100</td>\n",
" <td>6</td>\n",
" <td>58</td>\n",
" <td>80</td>\n",
" <td>95</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R04</th>\n",
" <td>79</td>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>85</td>\n",
" <td>16</td>\n",
" <td>5</td>\n",
" <td>16</td>\n",
" <td>69</td>\n",
" <td>5</td>\n",
" <td>20</td>\n",
" <td>99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R05</th>\n",
" <td>45</td>\n",
" <td>26</td>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>100</td>\n",
" <td>60</td>\n",
" <td>21</td>\n",
" <td>19</td>\n",
" <td>95</td>\n",
" <td>12</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R06</th>\n",
" <td>12</td>\n",
" <td>29</td>\n",
" <td>18</td>\n",
" <td>98</td>\n",
" <td>62</td>\n",
" <td>68</td>\n",
" <td>92</td>\n",
" <td>29</td>\n",
" <td>74</td>\n",
" <td>96</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R07</th>\n",
" <td>36</td>\n",
" <td>32</td>\n",
" <td>22</td>\n",
" <td>4</td>\n",
" <td>66</td>\n",
" <td>25</td>\n",
" <td>63</td>\n",
" <td>51</td>\n",
" <td>59</td>\n",
" <td>14</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R08</th>\n",
" <td>55</td>\n",
" <td>53</td>\n",
" <td>89</td>\n",
" <td>13</td>\n",
" <td>84</td>\n",
" <td>87</td>\n",
" <td>74</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>64</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R09</th>\n",
" <td>46</td>\n",
" <td>74</td>\n",
" <td>36</td>\n",
" <td>54</td>\n",
" <td>21</td>\n",
" <td>12</td>\n",
" <td>68</td>\n",
" <td>33</td>\n",
" <td>80</td>\n",
" <td>25</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C00 C01 C02 C03 C04 C05 C06 C07 C08 C09 C10\n",
"R00 79 19 21 99 35 59 44 25 75 58 63\n",
"R01 25 39 89 66 9 41 6 69 63 3 99\n",
"R02 37 64 31 69 61 97 5 11 76 57 38\n",
"R03 74 61 100 6 58 80 95 50 15 51 73\n",
"R04 79 60 83 85 16 5 16 69 5 20 99\n",
"R05 45 26 73 73 100 60 21 19 95 12 77\n",
"R06 12 29 18 98 62 68 92 29 74 96 97\n",
"R07 36 32 22 4 66 25 63 51 59 14 11\n",
"R08 55 53 89 13 84 87 74 3 2 64 26\n",
"R09 46 74 36 54 21 12 68 33 80 25 83"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Mangle your data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**recall: NumPy Broadcasting**"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.61903208],\n",
" [0.76795317],\n",
" [0.26790371]])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.random.rand(3,1)\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.58635195],\n",
" [0.64208162],\n",
" [0.12870538]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b = np.random.rand(3,1)\n",
"b"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1.20538403],\n",
" [1.41003479],\n",
" [0.39660909]])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a + b"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.03268013],\n",
" [0.12587155],\n",
" [0.13919833]])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a - b"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.36297067],\n",
" [0.49308861],\n",
" [0.03448065]])"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a * b"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1.05573466],\n",
" [1.19603669],\n",
" [2.08152693]])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a / b"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[7.61903208],\n",
" [7.76795317],\n",
" [7.26790371]])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a + 7"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.],\n",
" [0.],\n",
" [0.]])"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b // 2"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[False],\n",
" [False],\n",
" [False]])"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a % 2 == 0"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[2.47612831, 3.09516039],\n",
" [3.07181268, 3.83976585],\n",
" [1.07161484, 1.33951855]])"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a * [4, 5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Broadcasting in Pandas"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice \n",
"0 Star Wars 27 11.81 \n",
"1 PlayStation 1 284.71 \n",
"2 banana 49 10.00 \n",
"3 Thriller record 48 16.77 \n",
"4 Harry Potter book 4 5.65 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data = pd.read_json('./data/blooth_sales_data.json',\n",
" convert_dates=['birthday', 'orderdate']\n",
" )\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover \n",
"0 Star Wars 27 11.81 318.87 \n",
"1 PlayStation 1 284.71 284.71 \n",
"2 banana 49 10.00 490.00 \n",
"3 Thriller record 48 16.77 804.96 \n",
"4 Harry Potter book 4 5.65 22.60 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'] = sales_data['unitprice'] * sales_data['units']\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"59915.64764605545"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"class A:\n",
" \n",
" def __init__(self, a=0):\n",
" self.a = a\n",
" \n",
" def __add__(self, o):\n",
" return self.a + o.a\n",
" \n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"a = A(10)\n",
"b = A(20)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a + b"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [

"/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/590773150.py:1:
FutureWarning: DataFrame.mean and DataFrame.median with numeric_only=None will
include datetime64 and datetime64tz columns in a future version.\n",
" sales_data.mean()\n",

"/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/590773150.py:1:
FutureWarning: Dropping of nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this will raise TypeError.
Select only valid columns before calling the reduction.\n",
" sales_data.mean()\n"
]
},
{
"data": {
"text/plain": [
"units 25.808102\n",
"unitprice 2329.681343\n",
"turnover 59915.647646\n",
"dtype: float64"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.mean()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"140502193.73000002"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'].sum()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"532.14"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'].median()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 2345 entries, 0 to 2344\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 name 2345 non-null object \n",
" 1 birthday 2345 non-null datetime64[ns]\n",
" 2 customer 2345 non-null object \n",
" 3 orderdate 2345 non-null datetime64[ns]\n",
" 4 product 2345 non-null object \n",
" 5 units 2345 non-null int64 \n",
" 6 unitprice 2345 non-null float64 \n",
" 7 turnover 2345 non-null float64 \n",
"dtypes: datetime64[ns](2), float64(2), int64(1), object(3)\n",
"memory usage: 146.7+ KB\n"
]
}
],
"source": [
"sales_data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Excercise"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate the mean without using .mean"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"# your code here\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Find the oldest customer (bithday)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# your code here\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Functions with **`.map(), .apply() .applymap()`**"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year \n",
"0 Star Wars 27 11.81 318.87 2016 \n",
"1 PlayStation 1 284.71 284.71 2016 \n",
"2 banana 49 10.00 490.00 2016 \n",
"3 Thriller record 48 16.77 804.96 2016 \n",
"4 Harry Potter book 4 5.65 22.60 2016 "
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# map for Series\n",
"sales_data['year'] = sales_data['orderdate'].map(lambda x: x.year)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month \n",
"0 Star Wars 27 11.81 318.87 2016 10 \n",
"1 PlayStation 1 284.71 284.71 2016 9 \n",
"2 banana 49 10.00 490.00 2016 10 \n",
"3 Thriller record 48 16.77 804.96 2016 10 \n",
"4 Harry Potter book 4 5.65 22.60 2016 10 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# map for Series\n",
"sales_data['month'] = sales_data['orderdate'].map(lambda x: x.month)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# combine multiple columns\n",
"sales_data['year-month'] = sales_data['year'].map(\n",
" str) + sales_data['month'].map(lambda x: \"-{:02d}\".format(x))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 11.81 318.87 2016 10 2016-10 \
n",
"1 PlayStation 1 284.71 284.71 2016 9 2016-09 \
n",
"2 banana 49 10.00 490.00 2016 10 2016-10 \
n",
"3 Thriller record 48 16.77 804.96 2016 10 2016-10 \
n",
"4 Harry Potter book 4 5.65 22.60 2016 10 2016-10 "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>11.81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>284.71</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10.00</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16.77</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.65</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 11.81 27\n",
"1 284.71 1\n",
"2 10.00 49\n",
"3 16.77 48\n",
"4 5.65 4"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>47.24</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1138.84</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40.00</td>\n",
" <td>196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>67.08</td>\n",
" <td>192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>22.60</td>\n",
" <td>16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 47.24 108\n",
"1 1138.84 4\n",
"2 40.00 196\n",
"3 67.08 192\n",
"4 22.60 16"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# applymap for df\n",
"sales_data[['unitprice', 'units']].applymap(lambda x: np.round(4 * x,
2)).head(5)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>47.24</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1138.84</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40.00</td>\n",
" <td>196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>67.08</td>\n",
" <td>192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>22.60</td>\n",
" <td>16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 47.24 108\n",
"1 1138.84 4\n",
"2 40.00 196\n",
"3 67.08 192\n",
"4 22.60 16"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].apply(lambda x: np.round(4 * x,
2)).head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Differences of **`.map(), .apply() .applymap()`**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" * **`.map()`**\n",
" >iterate over each element of a DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 11\n",
"1 284\n",
"2 10\n",
"3 16\n",
"4 5\n",
"Name: unitprice, dtype: int64"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['unitprice'].map(int)[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" * **`.apply()`**\n",
" > apply a function on an axis of the DataFrame."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"e.g. sum up two cells (hier: unitprice + units)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 38.81\n",
"1 285.71\n",
"2 59.00\n",
"3 64.77\n",
"4 9.65\n",
"dtype: float64"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].apply(sum, axis=1)[:5] # default axis=0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" * **`.applymap()`** \n",
" > apply a function on each element of the DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unitprice</th>\n",
" <th>units</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>11</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>284</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unitprice units\n",
"0 11 27\n",
"1 284 1\n",
"2 10 49\n",
"3 16 48\n",
"4 5 4"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[['unitprice', 'units']].applymap(int)[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Remember: a copy of the DF is returned. The DF is not altered."
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>11.81</td>\n",
" <td>318.87</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>284.71</td>\n",
" <td>284.71</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>10.00</td>\n",
" <td>490.00</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>16.77</td>\n",
" <td>804.96</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>5.65</td>\n",
" <td>22.60</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 11.81 318.87 2016 10 2016-10 \
n",
"1 PlayStation 1 284.71 284.71 2016 9 2016-09 \
n",
"2 banana 49 10.00 490.00 2016 10 2016-10 \
n",
"3 Thriller record 48 16.77 804.96 2016 10 2016-10 \
n",
"4 Harry Potter book 4 5.65 22.60 2016 10 2016-10 "
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"sales_data['unitprice'] = sales_data['unitprice'].map(lambda x: x * 2)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2345.000000</td>\n",
" <td>2345.000000</td>\n",
" <td>2.345000e+03</td>\n",
" <td>2345.0</td>\n",
" <td>2345.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>25.808102</td>\n",
" <td>4659.362687</td>\n",
" <td>5.991565e+04</td>\n",
" <td>2016.0</td>\n",
" <td>9.812793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>14.474110</td>\n",
" <td>13164.655228</td>\n",
" <td>1.969636e+05</td>\n",
" <td>0.0</td>\n",
" <td>0.390161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>10.020000</td>\n",
" <td>5.080000e+00</td>\n",
" <td>2016.0</td>\n",
" <td>9.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>13.000000</td>\n",
" <td>21.320000</td>\n",
" <td>2.601000e+02</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>26.000000</td>\n",
" <td>35.700000</td>\n",
" <td>5.321400e+02</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>39.000000</td>\n",
" <td>979.880000</td>\n",
" <td>1.016450e+04</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>50.000000</td>\n",
" <td>49791.260000</td>\n",
" <td>1.222270e+06</td>\n",
" <td>2016.0</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" units unitprice turnover year month\n",
"count 2345.000000 2345.000000 2.345000e+03 2345.0 2345.000000\n",
"mean 25.808102 4659.362687 5.991565e+04 2016.0 9.812793\n",
"std 14.474110 13164.655228 1.969636e+05 0.0 0.390161\n",
"min 1.000000 10.020000 5.080000e+00 2016.0 9.000000\n",
"25% 13.000000 21.320000 2.601000e+02 2016.0 10.000000\n",
"50% 26.000000 35.700000 5.321400e+02 2016.0 10.000000\n",
"75% 39.000000 979.880000 1.016450e+04 2016.0 10.000000\n",
"max 50.000000 49791.260000 1.222270e+06 2016.0 10.000000"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/16/xdrp9d8s5510rt6fwr4j6llr0000gn/T/ipykernel_42585/852887285.py:1:
FutureWarning: Treating datetime data as categorical rather than numeric in
`.describe` is deprecated and will be removed in a future version of pandas.
Specify `datetime_is_numeric=True` to silence this warning and adopt the future
behavior now.\n",
" sales_data['birthday'].describe()\n"
]
},
{
"data": {
"text/plain": [
"count 2345\n",
"unique 296\n",
"top 1998-04-20 00:00:00\n",
"freq 19\n",
"first 1952-02-07 00:00:00\n",
"last 1999-05-01 00:00:00\n",
"Name: birthday, dtype: object"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['birthday'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data['turnover'] = sales_data['turnover'].astype(np.int32)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [name, birthday, customer, orderdate, product, units, unitprice,
turnover, year, month, year-month]\n",
"Index: []"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data[sales_data['unitprice'].isnull()]"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.dropna().head(5)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.fillna(99.99).head(5)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>birthday</th>\n",
" <th>customer</th>\n",
" <th>orderdate</th>\n",
" <th>product</th>\n",
" <th>units</th>\n",
" <th>unitprice</th>\n",
" <th>turnover</th>\n",
" <th>year</th>\n",
" <th>month</th>\n",
" <th>year-month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ernesto</td>\n",
" <td>1974-01-07</td>\n",
" <td>Frontier Industries</td>\n",
" <td>2016-10-06 08:21:20.544568</td>\n",
" <td>Star Wars</td>\n",
" <td>27</td>\n",
" <td>23.62</td>\n",
" <td>318</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Queen</td>\n",
" <td>1986-02-05</td>\n",
" <td>Bell Telecom Limited</td>\n",
" <td>2016-09-30 08:21:20.544599</td>\n",
" <td>PlayStation</td>\n",
" <td>1</td>\n",
" <td>569.42</td>\n",
" <td>284</td>\n",
" <td>2016</td>\n",
" <td>9</td>\n",
" <td>2016-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Reid</td>\n",
" <td>1982-07-06</td>\n",
" <td>Software Co</td>\n",
" <td>2016-10-05 08:21:20.544622</td>\n",
" <td>banana</td>\n",
" <td>49</td>\n",
" <td>20.00</td>\n",
" <td>490</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Arlene</td>\n",
" <td>1971-04-12</td>\n",
" <td>Data Design Galaxy Co</td>\n",
" <td>2016-10-02 08:21:20.544643</td>\n",
" <td>Thriller record</td>\n",
" <td>48</td>\n",
" <td>33.54</td>\n",
" <td>804</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nikita</td>\n",
" <td>1984-12-14</td>\n",
" <td>Frontier Inc</td>\n",
" <td>2016-10-16 08:21:20.544666</td>\n",
" <td>Harry Potter book</td>\n",
" <td>4</td>\n",
" <td>11.30</td>\n",
" <td>22</td>\n",
" <td>2016</td>\n",
" <td>10</td>\n",
" <td>2016-10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name birthday customer
orderdate \\\n",
"0 Ernesto 1974-01-07 Frontier Industries 2016-10-06 08:21:20.544568 \
n",
"1 Queen 1986-02-05 Bell Telecom Limited 2016-09-30 08:21:20.544599 \
n",
"2 Reid 1982-07-06 Software Co 2016-10-05 08:21:20.544622 \
n",
"3 Arlene 1971-04-12 Data Design Galaxy Co 2016-10-02 08:21:20.544643 \
n",
"4 Nikita 1984-12-14 Frontier Inc 2016-10-16 08:21:20.544666 \
n",
"\n",
" product units unitprice turnover year month year-month \
n",
"0 Star Wars 27 23.62 318 2016 10 2016-10 \
n",
"1 PlayStation 1 569.42 284 2016 9 2016-09 \
n",
"2 banana 49 20.00 490 2016 10 2016-10 \
n",
"3 Thriller record 48 33.54 804 2016 10 2016-10 \
n",
"4 Harry Potter book 4 11.30 22 2016 10 2016-10 "
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sales_data.fillna(99.99, inplace=True)\n",
"sales_data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}

You might also like