import tensorflow as tf
import numpy as np
from pylab import imshow
import random
import cv2
import matplotlib.pyplot as plt

train_data, val_data = tf.keras.datasets.mnist.load_data()

x_train = tf.reshape(train_data[0], (len(train_data[0]), -1)) / 255
x_val = tf.reshape(val_data[0], (len(val_data[0]), -1)) / 255

n = x_train.shape[0]
q = x_train.shape[1]
d = len(set(train_data[1]))
h = 256
lambda_ = 0.001
sigma = 0.01
learning_rate = 0.00001

y_train = tf.one_hot(train_data[1], depth=d)
y_val = tf.one_hot(val_data[1], depth = d)

def loss(Y_hat, Y):
    loss_sums = tf.math.log(Y_hat) * Y
    return -float(tf.math.reduce_sum(
        loss_sums, axis=None, keepdims=False, name=None
    ) / len(loss_sums))

def classify(X, W1, b1, W2, b2):
    Z = tf.linalg.matmul(X, W1) + b1
    H = tf.nn.relu(Z)
    O = tf.linalg.matmul(H, W2) + b2
    Y_hat = tf.nn.softmax(O)
    return Y_hat, Z, H, O

def train(X, Y, val_X, val_Y, epochs=1000):
    W1 = tf.random.normal((q, h), 0, sigma)
    b1 = tf.random.normal((h,), 0, sigma)
    W2 = tf.random.normal((h, d), 0, sigma)
    b2 = tf.random.normal((d,), 0, sigma)    
    
    for epoch in range(epochs):
        Y_hat, Z, H, O = classify(X, W1, b1, W2, b2)
        if not epoch % 100 or epoch == epochs - 1:
            val_Y_hat, *_ = classify(val_X, W1, b1, W2, b2)
            print(f"Loss: {loss(Y_hat, Y)}, Validation Loss: {loss(val_Y_hat, val_Y)}")
        
        # Computing Partial Derivatives
        dH_dZ = tf.cast(H != 0, tf.float32)
        dL_dO = Y_hat - Y
        dJ_dZ = tf.linalg.matmul(dL_dO, tf.transpose(W2)) * dH_dZ

        dJ_dW2 = tf.linalg.matmul(tf.transpose(H), dL_dO) + (lambda_ * W2)
        dJ_db2 = tf.math.reduce_sum(dL_dO, axis=0) + (lambda_ * b2)
        dJ_dW1 = tf.linalg.matmul(tf.transpose(X), dJ_dZ) + (lambda_ * W1)
        dJ_db1 = tf.math.reduce_sum(dJ_dZ, axis=0) + (lambda_ * b1)
        
        W1 -= learning_rate * dJ_dW1
        b1 -= learning_rate * dJ_db1
        W2 -= learning_rate * dJ_dW2
        b2 -= learning_rate * dJ_db2
    return (W1, b1, W2, b2)

def accuracy(X, Y, val_X, val_Y, model):
    W1, b1, W2, b2 = model
    Y_hat, *_ = classify(X, W1, b1, W2, b2)
    val_Y_hat, *_ = classify(val_X, W1, b1, W2, b2)
    Y_hat_label = tf.math.argmax(Y_hat, axis=1)
    Y_label = tf.math.argmax(Y, axis=1)
    return sum(np.array(Y_hat_label == Y_label).astype(int)) / len(Y_hat_label)

def predict(model, img):
    preprocessed_img = tf.reshape(img, (-1,)) / 255
    W1, b1, W2, b2 = model
    Y_hat, *_ = classify(np.expand_dims(preprocessed_img, 0), W1, b1, W2, b2)
    prediction = tf.math.argmax(Y_hat[0])
    
    imshow(img)
    plt.show()
    print(f"Prediction: {prediction}")

final_model = train(x_train, y_train, x_val, y_val, epochs=1000)

Loss: 2.3020310401916504, Validation Loss: 2.3020031452178955
Loss: 0.3015304505825043, Validation Loss: 0.2890055179595947
Loss: 0.22133658826351166, Validation Loss: 0.21477515995502472
Loss: 0.17719019949436188, Validation Loss: 0.17488066852092743
Loss: 0.14817407727241516, Validation Loss: 0.1492069959640503
Loss: 0.12714974582195282, Validation Loss: 0.13153091073036194
Loss: 0.11101416498422623, Validation Loss: 0.1185445487499237
Loss: 0.09824879467487335, Validation Loss: 0.10873130708932877
Loss: 0.0879305973649025, Validation Loss: 0.10114668309688568
Loss: 0.07938310503959656, Validation Loss: 0.09510662406682968

accuracy(x_train, y_train, x_val, y_val, final_model)

0.9807166666666667

predict(final_model, val_data[0][22])

Prediction: 6

Multilayer Neural Network Implementation From Scratch: Handwritten Text Parser¶

Gino Prasad¶

10/03/2022¶

Will make a Multilayer Neural Network with 1 hidden layer with h hidden units¶