Machine Learning overview - assignment 2
Contents
42.43. Machine Learning overview - assignment 2#
42.43.1. Digit Recognizer#
Although this is a computer vision problem, we employ here a simple model using K-Nearest Neighbors algorithm in this notebook to be a good starting point. We use the GridSearchCV to fine tune the hyperparameters such as ân_neighborsâ, and âweightsâ. Furthermore, we use Data Augmentation or Artificial Data Synthesis technique in this notebook to boost the modelâs performance on the test set.
%matplotlib inline
import numpy as np # Linear algebra
import pandas as pd # For data manipulation
import json
import os
import matplotlib.pyplot as plt # For visualization
from sklearn.neighbors import KNeighborsClassifier # For modelling
from sklearn.model_selection import cross_val_score, GridSearchCV # For evaluation and hyperparameter tuning
from sklearn.metrics import confusion_matrix, classification_report # For evaluation
from scipy.ndimage import shift, rotate, zoom # For data augmentation
Peeking the data
Loading the datasets into dataframes
train_df = pd.read_csv(
"https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/mnist_train.csv"
)
test_df = pd.read_csv(
"https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/mnist_test.csv"
)
Knowing about the features in the datasets
train_df.info()
test_df.info()
Converting the train and test dataframes into numpy arrays
X_train = train_df.iloc[:6000, 1:].values
y_train = train_df.iloc[:6000, 0].values
X_test = test_df.iloc[:1000, 1:].values
y_test = test_df.iloc[:1000, 0].values
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")
Visualizing a digit from the training data as a 28 X 28 image
some_digit = X_train[46]
some_digit_image = some_digit.reshape(28, 28)
print(f"Label: {y_train[40]}")
plt.imshow(some_digit_image, cmap="binary")
plt.show()
Train Model
estimator = KNeighborsClassifier()
estimator.fit(X_train, y_train)
predictions = estimator.predict(X_test)
print(classification_report(y_test, predictions, digits=3), end="\n\n")
print(confusion_matrix(y_test, predictions), end="\n\n")
Fine-tuning the model by finding the best values for the hyperparameters (weights, n_neighbors) using GridSearchCV
grid_params = {
"weights": ['distance'],
"n_neighbors": [3, 5, 7, 9, 11]
}
estimator = KNeighborsClassifier()
grid_estimator = GridSearchCV(estimator, # Base estimator
grid_params, # Parameters to tune
verbose=2, # Verbosity of the logs
n_jobs=-1) # Number of jobs to be run concurrently with -1 meaning all the processors
# Fitting the estimator with training data
grid_estimator.fit(X_train, y_train)
print(f"Best Score: {grid_estimator.best_score_}", end="\n\n")
print(f"Best Parameters: \n{json.dumps(grid_estimator.best_params_, indent=4)}",
end="\n\n")
print("Grid Search CV results:")
results_df = pd.DataFrame(grid_estimator.cv_results_)
results_df
Best parameter values found: {n_neighbors: 3, weights: âdistanceâ}
Fitting a new model with the found hyperparameter values to the training data and making predictions on the test data
estimator = KNeighborsClassifier(n_neighbors=3, weights='distance')
estimator.fit(X_train, y_train)
predictions = estimator.predict(X_test)
confusion_matrix(y_test, predictions)
print(classification_report(y_test, predictions, digits=3), end="\n\n")
Data Augmentation
Each image in the training set is
shifted down, up, left and right by one pixel
rotated clockwise and anti-clockwise
clipped and zoomed at two different ranges
generating eight different images. The image is clipped before zooming to preserve the image size.
def shift_in_one_direction(image, direction):
"""
Shifts an image by one pixel in the specified direction
"""
if direction == "DOWN":
image = shift(image, [1, 0])
elif direction == "UP":
image = shift(image, [-1, 0])
elif direction == "LEFT":
image = shift(image, [0, -1])
else:
image = shift(image, [0, 1])
return image
def shift_in_all_directions(image):
"""
Shifts an image in all the directions by one pixel
"""
reshaped_image = image.reshape(28, 28)
down_shifted_image = shift_in_one_direction(reshaped_image, "DOWN")
up_shifted_image = shift_in_one_direction(reshaped_image, "UP")
left_shifted_image = shift_in_one_direction(reshaped_image, "LEFT")
right_shifted_image = shift_in_one_direction(reshaped_image, "RIGHT")
return (down_shifted_image, up_shifted_image,
left_shifted_image, right_shifted_image)
def rotate_in_all_directions(image, angle):
"""
Rotates an image clockwise and anti-clockwise
"""
reshaped_image = image.reshape(28, 28)
rotated_images = (rotate(reshaped_image, angle, reshape=False),
rotate(reshaped_image, -angle, reshape=False))
return rotated_images
def clipped_zoom(image, zoom_ranges):
"""
Clips and zooms an image at the specified zooming ranges
"""
reshaped_image = image.reshape(28, 28)
h, w = reshaped_image.shape
zoomed_images = []
for zoom_range in zoom_ranges:
zh = int(np.round(h / zoom_range))
zw = int(np.round(w / zoom_range))
top = (h - zh) // 2
left = (w - zw) // 2
zoomed_images.append(zoom(reshaped_image[top:top+zh, left:left+zw],
zoom_range))
return zoomed_images
def alter_image(image):
"""
Alters an image by shifting, rotating, and zooming it
"""
shifted_images = shift_in_all_directions(image)
rotated_images = rotate_in_all_directions(image, 10)
zoomed_images = clipped_zoom(image, [1.1, 1.2])
return np.r_[shifted_images, rotated_images, zoomed_images]
X_train_add = np.apply_along_axis(alter_image, 1, X_train).reshape(-1, 784)
y_train_add = np.repeat(y_train, 8)
print(f"X_train_add shape: {X_train_add.shape}")
print(f"y_train_add shape: {y_train_add.shape}")
Combining the synthesized data with the actual training data
X_train_combined = np.r_[X_train, X_train_add]
y_train_combined = np.r_[y_train, y_train_add]
del X_train
del X_train_add
del y_train
del y_train_add
print(f"X_train_combined shape: {X_train_combined.shape}")
print(f"y_train_combined shape: {y_train_combined.shape}")
Fitting a new model with the tuned hyperparameters to the combined dataset
cdata_estimator = KNeighborsClassifier(n_neighbors=3, weights='distance')
cdata_estimator.fit(X_train_combined, y_train_combined)
cdata_estimator_predictions = cdata_estimator.predict(X_test)
confusion_matrix(y_test, cdata_estimator_predictions)
print(classification_report(y_test, cdata_estimator_predictions, digits=3), end="\n\n")
Note: With Data Augmentation the accuracy jumped from 91.6% to 95.3% on the test data.
42.43.2. Acknowledgments#
Thanks to SkalskiP for creating the open-source Kaggle jupyter notebook, licensed under Apache 2.0. It inspires the majority of the content of this assignment.