{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_boston\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.98
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.14
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.03
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.94
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.33
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 10 \\\n", "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 \n", "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 \n", "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 \n", "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 \n", "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 \n", "\n", " 11 12 \n", "0 396.90 4.98 \n", "1 396.90 9.14 \n", "2 392.83 4.03 \n", "3 394.63 2.94 \n", "4 396.90 5.33 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.DataFrame(load_boston().data)\n", "target = pd.DataFrame(load_boston().target)\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112
00.0000000.180.0678150.00.3148150.5775050.6416070.2692030.0000000.2080150.2872341.0000000.089680
10.0002360.000.2423020.00.1728400.5479980.7826980.3489620.0434780.1049620.5531911.0000000.204470
20.0002360.000.2423020.00.1728400.6943860.5993820.3489620.0434780.1049620.5531910.9897370.063466
30.0002930.000.0630500.00.1502060.6585550.4418130.4485450.0869570.0667940.6489360.9942760.033389
40.0007050.000.0630500.00.1502060.6871050.5283210.4485450.0869570.0667940.6489361.0000000.099338
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 \\\n", "0 0.000000 0.18 0.067815 0.0 0.314815 0.577505 0.641607 0.269203 \n", "1 0.000236 0.00 0.242302 0.0 0.172840 0.547998 0.782698 0.348962 \n", "2 0.000236 0.00 0.242302 0.0 0.172840 0.694386 0.599382 0.348962 \n", "3 0.000293 0.00 0.063050 0.0 0.150206 0.658555 0.441813 0.448545 \n", "4 0.000705 0.00 0.063050 0.0 0.150206 0.687105 0.528321 0.448545 \n", "\n", " 8 9 10 11 12 \n", "0 0.000000 0.208015 0.287234 1.000000 0.089680 \n", "1 0.043478 0.104962 0.553191 1.000000 0.204470 \n", "2 0.043478 0.104962 0.553191 0.989737 0.063466 \n", "3 0.086957 0.066794 0.648936 0.994276 0.033389 \n", "4 0.086957 0.066794 0.648936 1.000000 0.099338 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "sc1 = MinMaxScaler()\n", "for i in data.columns:\n", " data[i] = sc1_data = sc1.fit_transform(pd.DataFrame(data[i]))\n", "data.head()\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "train_data, test_data, train_target, test_target = train_test_split(data, target, random_state=42)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import BaggingRegressor\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error, median_absolute_error, r2_score " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:397: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", " return column_or_1d(y, warn=True)\n" ] }, { "data": { "text/plain": [ "BaggingRegressor(base_estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", " normalize=False),\n", " bootstrap=True, bootstrap_features=False, max_features=1.0,\n", " max_samples=1.0, n_estimators=10, n_jobs=None, oob_score=False,\n", " random_state=None, verbose=0, warm_start=False)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base = LinearRegression()\n", "reg = BaggingRegressor(base_estimator = base)\n", "reg.fit(train_data, train_target)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.111488268455766\n", "22.733840163604004\n", "2.181487507541238\n" ] } ], "source": [ "print(mean_absolute_error(reg.predict(test_data), test_target))\n", "print(mean_squared_error(reg.predict(test_data), test_target))\n", "print(median_absolute_error(reg.predict(test_data), test_target))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestRegressor" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] }, { "data": { "text/plain": [ "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,\n", " max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n", " oob_score=False, random_state=0, verbose=0, warm_start=False)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg = RandomForestRegressor(max_depth=2, random_state=0,\n", " n_estimators=100)\n", "reg.fit(train_data, train_target)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.1816471096942216\n", "20.500423246847987\n", "2.6335704222392344\n" ] } ], "source": [ "print(mean_absolute_error(reg.predict(test_data), test_target))\n", "print(mean_squared_error(reg.predict(test_data), test_target))\n", "print(median_absolute_error(reg.predict(test_data), test_target))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import GridSearchCV" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:528: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " estimator.fit(X_train, y_train, **fit_params)\n", "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:740: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " self.best_estimator_.fit(X, y, **fit_params)\n" ] }, { "data": { "text/plain": [ "GridSearchCV(cv=3, error_score='raise-deprecating',\n", " estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n", " max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n", " oob_score=False, random_state=0, verbose=0, warm_start=False),\n", " fit_params=None, iid='warn', n_jobs=None,\n", " param_grid={'max_depth': range(1, 10)}, pre_dispatch='2*n_jobs',\n", " refit=True, return_train_score='warn', scoring=None, verbose=0)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg = RandomForestRegressor(random_state=0,\n", " n_estimators=100)\n", "param = {'max_depth':range(1,10)}\n", "GV = GridSearchCV(reg, param, cv=3)\n", "GV.fit(train_data, train_target)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=8,\n", " max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n", " oob_score=False, random_state=0, verbose=0, warm_start=False)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "GV.best_estimator_" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.1756975307253894\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sim\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] } ], "source": [ "reg = RandomForestRegressor(max_depth=8, random_state=0,\n", " n_estimators=100)\n", "reg.fit(train_data, train_target)\n", "print(mean_absolute_error(reg.predict(test_data), test_target))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }