A hands-on explanation of Gradient Boosting Regression

Introduction

Boosting & Adaptive Boosting vs Gradient Boosting

An Overview Of Adaptive Boosting

Gradient Boosting

Learning Rate

A hands-on example of Gradient Boosting Regression with Python & Scikit-Learn

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
df = pd.DataFrame(load_boston()['data'],columns=load_boston()['feature_names'])
df['y'] = load_boston()['target']
df.head(5)
X,y = df.drop('y',axis=1),df['y']kf = KFold(n_splits=5,random_state=42,shuffle=True)for train_index,val_index in kf.split(X):
X_train,X_val = X.iloc[train_index],X.iloc[val_index],
y_train,y_val = y.iloc[train_index],y.iloc[val_index]
base = [y_train.mean()] * len(y_train)
learning_rate = 0.1
residuals_1 = base - y_train
mean_squared_error(y_val, base[len(y_val)])
OUT:
71.92521322606885
predictions_dtree_1 = base + learning_rate * dtree_1.predict(X_train)
mean_squared_error(y_train,predictions_dtree_1)
OUT:
70.90445609876541
residuals_2 = y_train - predictions_dtree_1
dtree_2 = DecisionTreeRegressor(random_state=42)
dtree_2.fit(X_train,residuals_2)
predictions_dtree_2 = ((dtree_2.predict(X_train) * learning_rate)) + predictions_dtree_1
mean_squared_error(y_train,predictions_dtree_2)
OUT:
57.43260944000001
residuals_3 = y_train - predictions_dtree_2
dtree_3 = DecisionTreeRegressor(random_state=42)
dtree_3.fit(X_train,residuals_3)
predictions_dtree_3 = (dtree_3.predict(X_train) * learning_rate) + predictions_dtree_2residuals_4 = y_train - predictions_dtree_3
dtree_4 = DecisionTreeRegressor(random_state=42)
dtree_4.fit(X_train,residuals_4)
predictions_dtree_4 = (dtree_4.predict(X_train) * learning_rate) + predictions_dtree_3
mean_squared_error(y_train,predictions_dtree_4)
OUT:43.90388561846081
y_pred = base[:101] + learning_rate * 
(dtree_1.predict(X_val)) +
(dtree_2.predict(X_val) * learning_rate) +
(dtree_3.predict(X_val) * learning_rate) +
(dtree_4.predict(X_val) * learning_rate)
mean_squared_error(y_train, y_pred)OUT:
42.32013345535233

Gradient Boosting with Scikit-Learn’s GradientBoostingRegressor

gradient_booster = GradientBoostingRegressor(loss='ls',learning_rate=0.1)
gradient_booster.get_params()
OUT:
{'alpha': 0.9,
'ccp_alpha': 0.0,
'criterion': 'friedman_mse',
'init': None,
'learning_rate': 0.1,
'loss': 'ls',
'max_depth': 3,
'max_features': None,
'max_leaf_nodes': None,
'min_impurity_decrease': 0.0,
'min_impurity_split': None,
'min_samples_leaf': 1,
'min_samples_split': 2,
'min_weight_fraction_leaf': 0.0,
'n_estimators': 100,
'n_iter_no_change': None,
'presort': 'deprecated',
'random_state': None,
'subsample': 1.0,
'tol': 0.0001,
'validation_fraction': 0.1,
'verbose': 0,
'warm_start': False}
gradient_booster.fit(X_train,y_train)gradient_booster.score(X_train,y_train)OUT:
0.9791009142174039
gradient_booster.score(X_val,y_val)OUT:
0.8847454683496595
gradient_booster = GradientBoostingRegressor(loss='ls',learning_rate=0.25)gradient_booster.fit(X_train,y_train)gradient_booster.score(X_train,y_train)OUT:
0.994857818295815
gradient_booster.score(X_val,y_val)OUT:
0.9082261292781879
predictions = gradient_booster.predict(X_val)
mean_squared_error(y_val,predictions)
OUT:
6.599129139886324

A High School student with a passion for AI

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store