{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":174},"id":"KYos1DyWzRyA","executionInfo":{"status":"ok","timestamp":1710341555968,"user_tz":240,"elapsed":1617,"user":{"displayName":"Deniz Boloni-Turgut","userId":"05746704740663907513"}},"outputId":"4fbfc1ae-7f51-4493-bb55-a931b1a04871","collapsed":true},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" Sepal Length Sepal Width Petal Length Petal Width\n","Sepal Length 1.000000 -0.117570 0.871754 0.817941\n","Sepal Width -0.117570 1.000000 -0.428440 -0.366126\n","Petal Length 0.871754 -0.428440 1.000000 0.962865\n","Petal Width 0.817941 -0.366126 0.962865 1.000000"],"text/html":["\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Sepal LengthSepal WidthPetal LengthPetal Width
Sepal Length1.000000-0.1175700.8717540.817941
Sepal Width-0.1175701.000000-0.428440-0.366126
Petal Length0.871754-0.4284401.0000000.962865
Petal Width0.817941-0.3661260.9628651.000000
\n","
\n","
\n","\n","
\n"," \n","\n"," \n","\n"," \n","
\n","\n","\n","
\n"," \n","\n","\n","\n"," \n","
\n","
\n","
\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","summary":"{\n \"name\": \"# the correlation coefficients will be along the main diagonal?\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Sepal Length\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5127858813273581,\n \"min\": -0.11756978413300088,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n -0.11756978413300088,\n 0.8179411262715758,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Sepal Width\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6657080809503223,\n \"min\": -0.42844010433053864,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.0,\n -0.3661259325364377,\n -0.11756978413300088\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Petal Length\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6887672414981271,\n \"min\": -0.42844010433053864,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n -0.42844010433053864,\n 0.962865431402796,\n 0.8717537758865838\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Petal Width\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6512846518377995,\n \"min\": -0.3661259325364377,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n -0.3661259325364377,\n 1.0,\n 0.8179411262715758\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":1}],"source":["import pandas as pd\n","from sklearn.datasets import load_iris\n","from sklearn.model_selection import train_test_split\n","iris = load_iris()\n","X = pd.DataFrame(iris.data, columns = ['Sepal Length','Sepal Width','Petal Length','Petal Width']) # Name columns according to data set\n","X.corr(method = \"pearson\")\n","# This will display a 4 x 4 DataFrame displaying the correlations between the row and column variable. What values of\n","# the correlation coefficients will be along the main diagonal?"]},{"cell_type":"code","source":["corr_feature = ['Petal Length','Petal Width']\n","target = X['Sepal Length']\n","\n","feature_train, feature_test, target_train, target_test = train_test_split(X[corr_feature[:1]],target,test_size = 0.2, random_state=42) # only look at Petal Length\n","corr_feature_train, corr_feature_test, corr_target_train, corr_target_test = train_test_split(X[corr_feature],target,test_size = 0.2, random_state=42) # look at both Petal Length and Width"],"metadata":{"id":"M7aYK_razSfn","executionInfo":{"status":"ok","timestamp":1710341866343,"user_tz":240,"elapsed":157,"user":{"displayName":"Deniz Boloni-Turgut","userId":"05746704740663907513"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["from sklearn.linear_model import LinearRegression\n","\n","# Instantiate\n","uncorr_model = LinearRegression()\n","corr_model = LinearRegression()\n","\n","uncorr_model.fit(feature_train, target_train)\n","corr_model.fit(corr_feature_train, corr_target_train)"],"metadata":{"id":"1KA2zeQJze9D","colab":{"base_uri":"https://localhost:8080/","height":74},"executionInfo":{"status":"ok","timestamp":1710364028494,"user_tz":240,"elapsed":422,"user":{"displayName":"Deniz Boloni-Turgut","userId":"05746704740663907513"}},"outputId":"d87b5f0e-2ed1-4d59-b626-ae816b499c8a","collapsed":true},"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["LinearRegression()"],"text/html":["
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"]},"metadata":{},"execution_count":3}]},{"cell_type":"code","source":["print('Uncorrelated training score: ', uncorr_model.score(feature_train, target_train))\n","print('Uncorrelated testing score: ', uncorr_model.score(feature_test, target_test))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mwepGrFy_XN0","executionInfo":{"status":"ok","timestamp":1710364069614,"user_tz":240,"elapsed":181,"user":{"displayName":"Deniz Boloni-Turgut","userId":"05746704740663907513"}},"outputId":"25ae54df-6f5d-4194-9f7f-47c16d238378","collapsed":true},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["Uncorrelated training score: 0.7433457054705969\n","Uncorrelated testing score: 0.812980761507489\n"]}]},{"cell_type":"code","source":["print('Correlated training score: ', corr_model.score(corr_feature_train, corr_target_train))\n","print('Correlated testing score: ', corr_model.score(corr_feature_test, corr_target_test))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EY0Ovb6y_dQV","executionInfo":{"status":"ok","timestamp":1710364097333,"user_tz":240,"elapsed":153,"user":{"displayName":"Deniz Boloni-Turgut","userId":"05746704740663907513"}},"outputId":"ce5bb6fe-4969-4852-abaf-2598103c2648","collapsed":true},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["Correlated training score: 0.7543504893573446\n","Correlated testing score: 0.7972490195696222\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"mky5o-bBvgUU"},"execution_count":null,"outputs":[]}]}