Linear regression with regularization¶

by Xiaoqi Zheng, 03/18/2020¶

set.seed(123)    # seed for reproducibility
library(glmnet)  # for ridge regression

## load data
data("mtcars")

X <- as.matrix(mtcars[,which(names(mtcars)!="mpg")])
y <- as.matrix(scale(mtcars[,"mpg"],center = T,scale = T)) # scale y
y <- mtcars[,"mpg"]

## select training and test samples
n <- nrow(X)
train_rows <- sample(1:n, .66*n) ## 66% training, 34% test

X.train <- X[train_rows, ]
X.test <- X[-train_rows, ]

y.train <- y[train_rows]
y.test <- y[-train_rows]

1. Lasso¶

Steps¶

Select best lambda by cross-validation;
Train a model by selected lambda;
Prediction

# See how increasing lambda shrinks the coefficients --------------------------
# Setting alpha = 1 implements lasso regression
lasso.model <- glmnet(X.train, y.train, alpha = 1, standardize = TRUE)
plot(lasso.model, xvar = "lambda")
legend("bottomright", lwd = 1, col = 1:ncol(X.train), legend = colnames(X), cex = 1)

# Perform 10-fold cross-validation to select lambda
lasso_cv <- cv.glmnet(X.train, y.train, alpha = 1, type.measure="mse",
                      standardize = TRUE, nfolds =10,family="gaussian")
# Plot cross-validation results
plot(lasso_cv)

Warning message:
“Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold”

# Select best cross-validated lambda
lambda_cv <- lasso_cv$lambda.min
# Fit final model, get its sum of squared residuals and multiple R-squared
## 注：也有人不做前面的交叉验证步骤，直接给 lambda一个值，比如lambda = 0.001
lasso.model <- glmnet(X.train, y.train, lambda = lambda_cv, alpha = 1, standardize = TRUE)

# Make prediction
y.test_hat <- predict(lasso.model, X.test)

Rsquare <- cor(y.test,y.test_hat)^2
MSE <- mean((y.test - y.test_hat)^2)

plot(y.test,y.test_hat,col="#00000050",pch = 19,main = "Test result for lasso")
abline(lm(y.test_hat~y.test),lty = 2,lwd = 2,col = "gray")
text(x = 12,y = 20,labels = paste0("MSE = ",round(MSE,4),"\n","R-sequared = ",round(Rsquare,4)))

2. Ridge¶

# See how increasing lambda shrinks the coefficients --------------------------
# Setting alpha = 0 implements ridge regression
ridge.model <- glmnet(X.train, y.train, alpha = 0, standardize = TRUE)
plot(ridge.model, xvar = "lambda",main = "Ridge")
legend("bottomright", lwd = 1, col = 1:ncol(X.train), legend = colnames(X), cex = 1)

# Perform 10-fold cross-validation to select lambda
ridge_cv <- cv.glmnet(X.train, y.train, alpha = 0, type.measure="mse",
                      standardize = TRUE, nfolds =10,family="gaussian")
# Plot cross-validation results
plot(ridge_cv,main = "Ridge")

Warning message:
“Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold”

# Select best cross-validated lambda
lambda_cv <- ridge_cv$lambda.min
# Fit final model, get its sum of squared residuals and multiple R-squared
## 注：也有人不做前面的交叉验证步骤，直接给 lambda一个值，比如lambda = 0.001
ridge.model <- glmnet(X.train, y.train, lambda = lambda_cv, alpha = 0, standardize = TRUE)

# Make prediction
y.test_hat <- predict(ridge.model, X.test)

Rsquare <- cor(y.test,y.test_hat)^2
MSE <- mean((y.test - y.test_hat)^2)

plot(y.test,y.test_hat,col="#00000050",pch = 19,main = "Test result for ridge")
abline(lm(y.test_hat~y.test),lty = 2,lwd = 2,col = "gray")
text(x = 12,y = 20,labels = paste0("MSE = ",round(MSE,4),"\n","R-sequared = ",round(Rsquare,4)))

3. Elastic Net¶

There are two parameters to tune: λ and α. The glmnet package allows to tune λ via cross-validation for a fixed α, but it does not support α-tuning, so we will turn to caret for this job.

dat <- cbind(y.train, X.train)
head(dat)

library(caret)

# Set training control
train_control <- trainControl(method = "repeatedcv",
                              number = 5,
                              repeats = 5,
                              search = "random",
                              verboseIter = F)

# Train the model
elastic_net_model <- train(y.train ~ .,
                           data = dat,
                           method = "glmnet",
                           preProcess = c("center", "scale"),
                           tuneLength = 25,
                           trControl = train_control)

Warning message in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
“There were missing values in resampled performance measures.”

# Check results
y.test_hat <- predict(elastic_net_model, X.test)
Rsquare <- cor(y.test, y.test_hat)^2
MSE <- mean((y.test - y.test_hat)^2)

plot(y.test,y.test_hat,col="#00000050",pch = 19,main = "Test result for ENet")
abline(lm(y.test_hat~y.test),lty = 2,lwd = 2,col = "gray")
text(x = 12,y = 20,labels = paste0("MSE = ",round(MSE,4),"\n","R-sequared = ",round(Rsquare,4)))

	y.train	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
Maserati Bora	15.0	8	301.0	335	3.54	3.570	14.60	0	1	5	8
Cadillac Fleetwood	10.4	8	472.0	205	2.93	5.250	17.98	0	0	3	4
Honda Civic	30.4	4	75.7	52	4.93	1.615	18.52	1	1	4	2
Merc 450SLC	15.2	8	275.8	180	3.07	3.780	18.00	0	0	3	3
Datsun 710	22.8	4	108.0	93	3.85	2.320	18.61	1	1	4	1
Merc 280	19.2	6	167.6	123	3.92	3.440	18.30	1	0	4	4