# Load the dataset
data1 <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data", header = TRUE)
head(data1)
str(data1)
summary(data1)
# Split into Train and Test sets
# Training Set : Test Set = 70 : 30 (random)
set.seed(100)
train <- sample(nrow(data1), 0.7*nrow(data1), replace = FALSE)
TrainSet <- data1[train,]
TestSet <- data1[-train,]
#install.packages("rpart")
#install.packages("caret")
#install.packages("e1071")
library(rpart)
library(caret)
library(e1071)
model_dt = train(unacc ~ ., data = TrainSet, method = "rpart")
out_train = predict(model_dt, data = TrainSet)
table(out_train, TrainSet$unacc)
mean(out_train == TrainSet$unacc)
# Running on Test Set
out_test = predict(model_dt, newdata = TestSet)
table(out_test, TestSet$unacc)
mean(out_test == TestSet$unacc)
#install.packages("randomForest")
library(randomForest)
# Create a Random Forest model with default parameters
model1 <- randomForest(unacc ~ ., data = TrainSet, importance = TRUE)
model1
# Fine tuning parameters of Random Forest model
model2 <- randomForest(unacc ~ ., data = TrainSet, ntree = 100, mtry = 6, importance = TRUE)
model2
# Predicting on train set
predTrain <- predict(model2, TrainSet, type = "class")
# Checking classification accuracy
table(predTrain, TrainSet$unacc)
mean(predTrain == TrainSet$unacc)
# Predicting on Test set
predValid <- predict(model2, TestSet, type = "class")
# Checking classification accuracy
table(predValid,TestSet$unacc)
mean(predValid == TestSet$unacc)
# To check important variables
importance(model2)
varImpPlot(model2)
plot(model2)