Implementation of RMS prop for linear regression

Question

I'm trying to implement linear regression using Rms Prop optimizer from scratch.
Code:
EPOCHS = 100

w3 = tf.Variable(w_vector, dtype = tf.float32)
w4 = tf.Variable(0, dtype = tf.float32)
lr = 1e-5
beta = 0.9
epilson = 1e-7
momentum = 0.0
for epoch in range(1,EPOCHS+1):
    mom_w = 0
    mom_b = 0
    mean_square_w = 0
    mean_gradient_w = 0
    mean_square_b = 0
    mean_gradient_b = 0

y_pred1 = tf.squeeze(tf.matmul(w3,x, transpose_a = True, transpose_b = True) + w4)
    dw3, dw4 = gradients_mse(x, y, y_pred1)

# My eqautions for RMS prop
    mean_square_w = beta * mean_square_w + (1-beta) * dw3 ** 2
    mean_gradient_w = beta * mean_gradient_w + (1-beta) * dw3
    
    mom_w = momentum * mom_w + lr * (dw3/(tf.sqrt(mean_square_w + epilson - mean_gradient_w ** 2)))
    
    mean_square_b = beta * mean_square_b + (1-beta) * dw4 ** 2
    mean_gradient_b = beta * mean_gradient_b + (1-beta) * dw4
    
    mom_b = momentum * mom_b + lr * (dw4/(tf.sqrt(mean_square_b + epilson - mean_gradient_b ** 2)))

w3.assign_sub(mom_w)
    w4.assign_sub(mom_b)

print('w3 : {}'.format(w3.numpy()))
print('w4 : {}'.format(w4.numpy()))

Output:
w3 : [[-1.2507935]]
w4 : 0.0033333366736769676

Now I create a single layer and single neuron neural network with no activation function. Assign the same weights in its neuron and use RMS prop as optimizer I get different final weights. However, this was not the case for sgd optimizer.
Code:
# using keras to get same results
def create_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units = 1,  name = 'd1', input_shape = (x.shape[1],)))
    
    model.compile(optimizer=tf.keras.optimizers.RMSprop(
    learning_rate=1e-5, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False),
                loss="mse")
    
    return model
model = create_model()

d1 = model.get_layer('d1')
d1_weights = [tf.constant(w_vector, dtype = tf.float32), tf.constant(np.array([0]), dtype = tf.float32)]
d1.set_weights(d1_weights)

model.fit(x, y, epochs = 100)

d1 = model.get_layer('d1')
print('w3 = {}'.format(d1.weights[0].numpy()))
print('w4 = {}'.format(d1.weights[1].numpy()[0]))

Output:
w3 = [[-1.2530397]]
w4 = 0.0010913893347606063

My gradients are calculate correctly for mse loss function. I have crosschecked them with tensorflows inbuilt gradient computation function gradient tape.
Code:
# Computing gradients
def gradients_mse(X, Y, Y_PREDS):
    DW1 = tf.matmul(X, tf.reshape(Y-Y_PREDS, (X.shape[0],1)), transpose_a = True) * (-2/X.shape[0])
    DW0 = (-2 / X.shape[0]) * tf.reduce_sum(Y - Y_PREDS)
    return DW1, DW0

The only thing that can go wrong in this implementation is I think calculation of mom_w and mom_b using incorrect equations.
x.shape = [10,1]
The default batch size is 32 so it will have no effects on weight updates. The same code gives perfectly matching output when I try to use simple gradient descent instead of RMS prop.

Implementation of RMS prop for linear regression

Add your own answers!

Ask a Question