{"cells":[{"cell_type":"markdown","metadata":{"id":"79rWt21wd_5Z"},"source":["The decision tree algorithm can be used to do both classification as well as regression and has the advantage of not assuming a linear model. Decisions trees are usually easy to represent visually which makes it easy to understand how the model actually works."]},{"cell_type":"markdown","metadata":{"id":"e2ipLHcwd_5Z"},"source":["### Geometric Intuition\n",""]},{"cell_type":"code","execution_count":1,"metadata":{"id":"eGym5xlHd_5c","executionInfo":{"status":"ok","timestamp":1711469960227,"user_tz":240,"elapsed":2289,"user":{"displayName":"Audrey Wang","userId":"01727017967557536978"}}},"outputs":[],"source":["# import necessary packages\n","import pandas as pd\n","import numpy as np\n","from sklearn.metrics import accuracy_score\n","from sklearn.model_selection import train_test_split\n","from sklearn.tree import DecisionTreeClassifier\n","from sklearn.neighbors import KNeighborsClassifier\n","from sklearn import tree\n","from sklearn import datasets"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"_gf8r9z8d_5f","outputId":"8ae8e7db-bfeb-4d98-9988-ef1877e95513","colab":{"base_uri":"https://localhost:8080/","height":270},"executionInfo":{"status":"ok","timestamp":1711470614179,"user_tz":240,"elapsed":521,"user":{"displayName":"Audrey Wang","userId":"01727017967557536978"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n","0 842302 M 17.99 10.38 122.80 1001.0 \n","1 842517 M 20.57 17.77 132.90 1326.0 \n","2 84300903 M 19.69 21.25 130.00 1203.0 \n","3 84348301 M 11.42 20.38 77.58 386.1 \n","4 84358402 M 20.29 14.34 135.10 1297.0 \n","\n"," smoothness_mean compactness_mean concavity_mean concave points_mean \\\n","0 0.11840 0.27760 0.3001 0.14710 \n","1 0.08474 0.07864 0.0869 0.07017 \n","2 0.10960 0.15990 0.1974 0.12790 \n","3 0.14250 0.28390 0.2414 0.10520 \n","4 0.10030 0.13280 0.1980 0.10430 \n","\n"," ... texture_worst perimeter_worst area_worst smoothness_worst \\\n","0 ... 17.33 184.60 2019.0 0.1622 \n","1 ... 23.41 158.80 1956.0 0.1238 \n","2 ... 25.53 152.50 1709.0 0.1444 \n","3 ... 26.50 98.87 567.7 0.2098 \n","4 ... 16.67 152.20 1575.0 0.1374 \n","\n"," compactness_worst concavity_worst concave points_worst symmetry_worst \\\n","0 0.6656 0.7119 0.2654 0.4601 \n","1 0.1866 0.2416 0.1860 0.2750 \n","2 0.4245 0.4504 0.2430 0.3613 \n","3 0.8663 0.6869 0.2575 0.6638 \n","4 0.2050 0.4000 0.1625 0.2364 \n","\n"," fractal_dimension_worst Unnamed: 32 \n","0 0.11890 NaN \n","1 0.08902 NaN \n","2 0.08758 NaN \n","3 0.17300 NaN \n","4 0.07678 NaN \n","\n","[5 rows x 33 columns]"],"text/html":["\n","
\n"," | id | \n","diagnosis | \n","radius_mean | \n","texture_mean | \n","perimeter_mean | \n","area_mean | \n","smoothness_mean | \n","compactness_mean | \n","concavity_mean | \n","concave points_mean | \n","... | \n","texture_worst | \n","perimeter_worst | \n","area_worst | \n","smoothness_worst | \n","compactness_worst | \n","concavity_worst | \n","concave points_worst | \n","symmetry_worst | \n","fractal_dimension_worst | \n","Unnamed: 32 | \n","
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n","842302 | \n","M | \n","17.99 | \n","10.38 | \n","122.80 | \n","1001.0 | \n","0.11840 | \n","0.27760 | \n","0.3001 | \n","0.14710 | \n","... | \n","17.33 | \n","184.60 | \n","2019.0 | \n","0.1622 | \n","0.6656 | \n","0.7119 | \n","0.2654 | \n","0.4601 | \n","0.11890 | \n","NaN | \n","
1 | \n","842517 | \n","M | \n","20.57 | \n","17.77 | \n","132.90 | \n","1326.0 | \n","0.08474 | \n","0.07864 | \n","0.0869 | \n","0.07017 | \n","... | \n","23.41 | \n","158.80 | \n","1956.0 | \n","0.1238 | \n","0.1866 | \n","0.2416 | \n","0.1860 | \n","0.2750 | \n","0.08902 | \n","NaN | \n","
2 | \n","84300903 | \n","M | \n","19.69 | \n","21.25 | \n","130.00 | \n","1203.0 | \n","0.10960 | \n","0.15990 | \n","0.1974 | \n","0.12790 | \n","... | \n","25.53 | \n","152.50 | \n","1709.0 | \n","0.1444 | \n","0.4245 | \n","0.4504 | \n","0.2430 | \n","0.3613 | \n","0.08758 | \n","NaN | \n","
3 | \n","84348301 | \n","M | \n","11.42 | \n","20.38 | \n","77.58 | \n","386.1 | \n","0.14250 | \n","0.28390 | \n","0.2414 | \n","0.10520 | \n","... | \n","26.50 | \n","98.87 | \n","567.7 | \n","0.2098 | \n","0.8663 | \n","0.6869 | \n","0.2575 | \n","0.6638 | \n","0.17300 | \n","NaN | \n","
4 | \n","84358402 | \n","M | \n","20.29 | \n","14.34 | \n","135.10 | \n","1297.0 | \n","0.10030 | \n","0.13280 | \n","0.1980 | \n","0.10430 | \n","... | \n","16.67 | \n","152.20 | \n","1575.0 | \n","0.1374 | \n","0.2050 | \n","0.4000 | \n","0.1625 | \n","0.2364 | \n","0.07678 | \n","NaN | \n","
5 rows × 33 columns
\n","