import tensorflow as tf
import numpy as np
from pylab import imshow
import random
import cv2
import matplotlib.pyplot as plt
train_data, val_data = tf.keras.datasets.mnist.load_data()
x_train = tf.reshape(train_data[0], (len(train_data[0]), -1)) / 255
x_val = tf.reshape(val_data[0], (len(val_data[0]), -1)) / 255
n = x_train.shape[0]
q = x_train.shape[1]
d = len(set(train_data[1]))
h = 256
lambda_ = 0.001
sigma = 0.01
learning_rate = 0.00001
y_train = tf.one_hot(train_data[1], depth=d)
y_val = tf.one_hot(val_data[1], depth = d)
def loss(Y_hat, Y):
loss_sums = tf.math.log(Y_hat) * Y
return -float(tf.math.reduce_sum(
loss_sums, axis=None, keepdims=False, name=None
) / len(loss_sums))
def classify(X, W1, b1, W2, b2):
Z = tf.linalg.matmul(X, W1) + b1
H = tf.nn.relu(Z)
O = tf.linalg.matmul(H, W2) + b2
Y_hat = tf.nn.softmax(O)
return Y_hat, Z, H, O
def train(X, Y, val_X, val_Y, epochs=1000):
W1 = tf.random.normal((q, h), 0, sigma)
b1 = tf.random.normal((h,), 0, sigma)
W2 = tf.random.normal((h, d), 0, sigma)
b2 = tf.random.normal((d,), 0, sigma)
for epoch in range(epochs):
Y_hat, Z, H, O = classify(X, W1, b1, W2, b2)
if not epoch % 100 or epoch == epochs - 1:
val_Y_hat, *_ = classify(val_X, W1, b1, W2, b2)
print(f"Loss: {loss(Y_hat, Y)}, Validation Loss: {loss(val_Y_hat, val_Y)}")
# Computing Partial Derivatives
dH_dZ = tf.cast(H != 0, tf.float32)
dL_dO = Y_hat - Y
dJ_dZ = tf.linalg.matmul(dL_dO, tf.transpose(W2)) * dH_dZ
dJ_dW2 = tf.linalg.matmul(tf.transpose(H), dL_dO) + (lambda_ * W2)
dJ_db2 = tf.math.reduce_sum(dL_dO, axis=0) + (lambda_ * b2)
dJ_dW1 = tf.linalg.matmul(tf.transpose(X), dJ_dZ) + (lambda_ * W1)
dJ_db1 = tf.math.reduce_sum(dJ_dZ, axis=0) + (lambda_ * b1)
W1 -= learning_rate * dJ_dW1
b1 -= learning_rate * dJ_db1
W2 -= learning_rate * dJ_dW2
b2 -= learning_rate * dJ_db2
return (W1, b1, W2, b2)
def accuracy(X, Y, val_X, val_Y, model):
W1, b1, W2, b2 = model
Y_hat, *_ = classify(X, W1, b1, W2, b2)
val_Y_hat, *_ = classify(val_X, W1, b1, W2, b2)
Y_hat_label = tf.math.argmax(Y_hat, axis=1)
Y_label = tf.math.argmax(Y, axis=1)
return sum(np.array(Y_hat_label == Y_label).astype(int)) / len(Y_hat_label)
def predict(model, img):
preprocessed_img = tf.reshape(img, (-1,)) / 255
W1, b1, W2, b2 = model
Y_hat, *_ = classify(np.expand_dims(preprocessed_img, 0), W1, b1, W2, b2)
prediction = tf.math.argmax(Y_hat[0])
imshow(img)
plt.show()
print(f"Prediction: {prediction}")
final_model = train(x_train, y_train, x_val, y_val, epochs=1000)
Loss: 2.3020310401916504, Validation Loss: 2.3020031452178955 Loss: 0.3015304505825043, Validation Loss: 0.2890055179595947 Loss: 0.22133658826351166, Validation Loss: 0.21477515995502472 Loss: 0.17719019949436188, Validation Loss: 0.17488066852092743 Loss: 0.14817407727241516, Validation Loss: 0.1492069959640503 Loss: 0.12714974582195282, Validation Loss: 0.13153091073036194 Loss: 0.11101416498422623, Validation Loss: 0.1185445487499237 Loss: 0.09824879467487335, Validation Loss: 0.10873130708932877 Loss: 0.0879305973649025, Validation Loss: 0.10114668309688568 Loss: 0.07938310503959656, Validation Loss: 0.09510662406682968
accuracy(x_train, y_train, x_val, y_val, final_model)
0.9807166666666667
predict(final_model, val_data[0][22])
Prediction: 6