42.73. Lasso and Ridge Regression#

import numpy as np
import pandas as pd

from sklearn.datasets import make_regression

from sklearn.metrics import r2_score

42.73.1. Common Regression class#

The most of things common in Lasson and Ridge Regression.
The only different between two regression is, what regularization it is using.

  • L1 regularization —> Lasso Regression

  • L2 regularization —> Ridge Regression

class Regression:
    def __init__(self, learning_rate, iteration, regularization):
        """
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.m = None
        self.n = None
        self.w = None
        self.b = None
        self.regularization = regularization # will be the L1/L2 regularization class according to the regression model.
        self.lr = learning_rate
        self.it = iteration

    def cost_function(self, y, y_pred):
        """
        :param y: Original target value.
        :param y_pred: predicted target value.
        """
        return (1 / (2*self.m)) * np.sum(np.square(y_pred - y)) + self.regularization(self.w)
    
    def hypothesis(self, weights, bias, X):
        """
        :param weights: parameter value weight.
        :param X: Training samples.
        """
        return np.dot(X, weights) #+ bias

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        # Insert constant ones for bias weights.
        X = np.insert(X, 0, 1, axis=1)

        # Target value should be in the shape of (n, 1) not (n, ).
        # So, this will check that and change the shape to (n, 1), if not.
        try:
            y.shape[1]
        except IndexError as e:
            # we need to change it to the 1 D array, not a list.
            print("ERROR: Target array should be a one dimentional array not a list"
                  "----> here the target value not in the shape of (n,1). \nShape ({shape_y_0},1) and {shape_y} not match"
                  .format(shape_y_0 = y.shape[0] , shape_y = y.shape))
            return 
        
        # m is the number of training samples.
        self.m = X.shape[0]
        # n is the number of features.
        self.n = X.shape[1]

        # Set the initial weight.
        self.w = np.zeros((self.n , 1))

        # bias.
        self.b = 0

        for it in range(1, self.it+1):
            # 1. Find the predicted value through the hypothesis.
            # 2. Find the Cost function value.
            # 3. Find the derivation of weights.
            # 4. Apply Gradient Decent.
            y_pred = self.hypothesis(self.w, self.b, X)
            #print("iteration",it)
            #print("y predict value",y_pred)
            cost = self.cost_function(y, y_pred)
            #print("Cost function",cost)
            # fin the derivative.
            dw = (1/self.m) * np.dot(X.T, (y_pred - y)) + self.regularization.derivation(self.w)
            #print("weights derivation",dw)
            #db = -(2 / self.m) * np.sum((y_pred - y))

            # change the weight parameter.
            self.w = self.w - self.lr * dw
            #print("updated weights",self.w)
            #self.b = self.b - self.lr * db


            if it % 10 == 0:
                print("The Cost function for the iteration {}----->{} :)".format(it, cost))
    def predict(self, test_X):
        """
        :param test_X: feature values to predict.
        """
        # Insert constant ones for bias weights.
        test_X = np.insert(test_X, 0, 1, axis=1)

        y_pred = self.hypothesis(self.w, self.b, test_X)
        return y_pred

42.73.2. Regularization classes#

# Create the regularization class we want.
class L1Regularization:
    """Regularization used for Lasson Regression"""
    def __init__(self, lamda):
        self.lamda = lamda

    def __call__(self, weights):
        "This will be retuned when we call this class."
        return self.lamda * np.sum(np.abs(weights))
    
    def derivation(self, weights):
        "Derivation of the regulariozation function."
        return self.lamda * np.sign(weights)


class L2Regularization:
    """Regularization used for Ridge Regression"""
    def __init__(self, lamda):
        self.lamda = lamda

    def __call__(self, weights):
        "This will be retuned when we call this class."
        return self.lamda * np.sum(np.square(weights))
    
    def derivation(self, weights):
        "Derivation of the regulariozation function."
        return self.lamda * 2 * (weights)

42.73.3. Data creation#

# Define the traning data.
X, y = make_regression(n_samples=50000, n_features=8)

# Chnage the shape of the target to 1 dimentional array.
y = y[:, np.newaxis]

print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
print("Shape of the target value ----------> {}".format(y.shape))
====================================================================================================
Number of training data samples-----> 50000
Number of training features --------> 8
Shape of the target value ----------> (50000, 1)
# display the data.
data = pd.DataFrame(X)
data.head()
0 1 2 3 4 5 6 7
0 -0.121669 -0.148495 0.617423 -1.662224 0.536879 -0.681744 -0.885964 0.869547
1 0.078910 -0.534563 -0.258445 2.274082 2.007185 2.304888 -0.477328 0.173211
2 1.492209 0.326460 0.724924 0.546021 -0.856196 0.292669 0.022331 -0.336342
3 -0.057504 -0.829756 1.446873 -0.206937 0.741066 0.265056 0.148822 -1.136302
4 0.941642 -0.523965 -0.218161 -0.703982 -0.222337 0.464776 -1.604384 0.461822
# display the data.
data_y = pd.DataFrame(y)
data_y.head()
0
0 -52.114639
1 195.157729
2 145.868942
3 -86.035711
4 32.734924

42.73.4. Lasson Regression from scratch#

class LassoRegression(Regression):
    """
    Lasso Regression is one of the variance of the Linear Regression. This model doing the parameter learning 
    and regularization at the same time. This model uses the L1Regularization. 
    * Regularization will be one of the soluions to the Overfitting.
    * Overfitting happens when the model has "High Variance and low bias". So, regularization adds a little bias to the model.
    * This model will try to keep the balance between learning the parameters and the complexity of the model( tries to keep the parameter having small value and small degree of palinamial).
    * The Regularization parameter(lamda) controls how severe  the regularization is. 
    * large lamda adds more bias , hence the Variance will go very small --> this may cause underfitting(Low bias and High Varinace).
    * Lamda can be found by tial and error methos. 
    """
    def __init__(self, lamda, learning_rate, iteration):
        """
        Define the hyperparameters we are going to use in this model.
        :param lamda: Regularization factor.
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.regularization = L1Regularization(lamda)
        super(LassoRegression, self).__init__(learning_rate, iteration, self.regularization)

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        return super(LassoRegression, self).train(X, y)
    def predict(self, test_X):
        """
        parma test_X: Value need to be predicted.
        """
        return super(LassoRegression, self).predict(test_X)
# define the parameters
param = {
    "lamda" : 0.1,
    "learning_rate" : 0.1,
    "iteration" : 100
}
print("="*100)
linear_reg = LassoRegression(**param)

# Train the model.
linear_reg.train(X, y) 

# Predict the values.
y_pred = linear_reg.predict(X)

# Root mean square error.
score = r2_score(y, y_pred)
print("The r2_score of the trained model", score)
====================================================================================================
The Cost function for the iteration 10----->1790.7949491493525 :)
The Cost function for the iteration 20----->249.1630251618197 :)
The Cost function for the iteration 30----->62.47992614761185 :)
The Cost function for the iteration 40----->39.84797838907741 :)
The Cost function for the iteration 50----->37.10119387961131 :)
The Cost function for the iteration 60----->36.76731980371954 :)
The Cost function for the iteration 70----->36.72663483920857 :)
The Cost function for the iteration 80----->36.72164454424515 :)
The Cost function for the iteration 90----->36.72101958323096 :)
The Cost function for the iteration 100----->36.72093598500494 :)
The r2_score of the trained model 0.9999965334550955

42.73.5. Lasso Regression using skicit-learn#

from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score

# data is already defined, going to use the same data for comparision.
print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
====================================================================================================
Number of training data samples-----> 50000
Number of training features --------> 8
lasso_sklearn = Lasso()
lasso_sklearn.fit(X, y)

# predict the value
y_pred_sklearn = lasso_sklearn.predict(X)
score = r2_score(y, y_pred_sklearn)
print("="*100)
print("R2 score of the model is {}".format(score))
====================================================================================================
R2 score of the model is 0.9996615456176722

42.73.6. Conclusion#

Our model (from scratch) also works great as compared to skiti-learn model. Both the models are giving 0.99…% r2_socre which is good.

42.73.7. Ridge Regression from scratch#

class RidgeRegression(Regression):
    """
    Ridge Regression is one of the variance of the Linear Regression. This model doing the parameter learning 
    and regularization at the same time. This model uses the L2Regularization. 
    This is very similar to the Lasso regression.
    * Regularization will be one of the soluions to the Overfitting.
    * Overfitting happens when the model has "High Variance and low bias". So, regularization adds a little bias to the model.
    * This model will try to keep the balance between learning the parameters and the complexity of the model( tries to keep the parameter having small value and small degree of palinamial).
    * The Regularization parameter(lamda) controls how severe  the regularization is. 
    * large lamda adds more bias , hence the Variance will go very small --> this may cause underfitting(Low bias and High Varinace).
    * Lamda can be found by tial and error methos. 
    """
    def __init__(self, lamda, learning_rate, iteration):
        """
        Define the hyperparameters we are going to use in this model.
        :param lamda: Regularization factor.
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.regularization = L2Regularization(lamda)
        super(RidgeRegression, self).__init__(learning_rate, iteration, self.regularization)

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        return super(RidgeRegression, self).train(X, y)
    def predict(self, test_X):
        """
        parma test_X: Value need to be predicted.
        """
        return super(RidgeRegression, self).predict(test_X)
# define the parameters
param = {
    "lamda" : 0.1,
    "learning_rate" : 0.1,
    "iteration" : 100
}
print("="*100)
linear_reg = RidgeRegression(**param)

# Train the model.
linear_reg.train(X, y) 

# Predict the values.
y_pred = linear_reg.predict(X)

# Root mean square error.
score = r2_score(y, y_pred)
print("The r2_score of the trained model", score)
====================================================================================================
The Cost function for the iteration 10----->2938.4966309980255 :)
The Cost function for the iteration 20----->2035.1369099078838 :)
The Cost function for the iteration 30----->1965.3537240779558 :)
The Cost function for the iteration 40----->1959.9564565636458 :)
The Cost function for the iteration 50----->1959.538491832234 :)
The Cost function for the iteration 60----->1959.5060835072245 :)
The Cost function for the iteration 70----->1959.5035673795078 :)
The Cost function for the iteration 80----->1959.503371776776 :)
The Cost function for the iteration 90----->1959.5033565506537 :)
The Cost function for the iteration 100----->1959.503355363845 :)
The r2_score of the trained model 0.9723201967872726

42.73.8. Ridge Regression using scikit-learn#

from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score

# data is already defined, going to use the same data for comparision.
print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
====================================================================================================
Number of training data samples-----> 50000
Number of training features --------> 8
ridge_sklearn = Ridge()
ridge_sklearn.fit(X, y)

# predict the value
y_pred_sklearn = ridge_sklearn.predict(X)
score = r2_score(y, y_pred_sklearn)
print("="*100)
print("R2 score of the model is {}".format(score))
====================================================================================================
R2 score of the model is 0.999999999601675

42.73.9. Acknowledgments#

Thanks to Pavithra Devi M for creating the Notebook Lasso and Ridge Regression from scratch, lisensed under the Apache 2.0. It inspires the majority of the content in this chapter.