Data Science Asked by Iain MacCormick on March 11, 2021
The following example is a simplified version of what I’m working on. I’m trying to find a neural network which minimises the cosine distance. The reason I have implemented my own cosine difference loss function, rather than using tensorflow’s built in method, is because in the full version of my project, it does not quite meet my requirement (although in this simple version they are equivalent.)
I input two orthogonal vectors (A and B) into the network. I am trying to reduce the cosine distance between (A and B.) It does this by minimising a loss function (which also includes a component which preserves the length of vector B as its being transformed.) Ultimately my output should be a vector which has the same direction as vector A with the length of vector B.
The problem I am having is that the network outputs “vector_B_transformed” and this vector never changes. The loss function I have built also is constant throughout training. I’ve tried initialising the weights differently but that hasn’t helped. I NEVER have a relu function on the final layer of my fully connected network, and have experimented with relu activation functions on the hidden layer – but this doesn’t seem to make a difference.
I am crudely appending the results into a list and printing them out to terminal. Epochs reduced to 200 but same problem when increasing.
If anyone can help me it would be greatly appreciated, as I’m really stuck.
import math
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.framework import ops
# from utils import *
##### New Helper Functions
# weight and bais wrappers
def weight_variable(name, shape):
"""
Create a weight variable with appropriate initialization
:param name: weight name
:param shape: weight shape
:return: initialized weight variable
"""
initer = tf.truncated_normal_initializer(stddev=0.01)
return tf.get_variable('W_' + name,
dtype=tf.float32,
shape=shape,
initializer=initer)
def bias_variable(name, shape):
"""
Create a bias variable with appropriate initialization
:param name: bias variable name
:param shape: bias variable shape
:return: initialized bias variable
"""
initial = tf.constant(0., shape=shape, dtype=tf.float32)
return tf.get_variable('b_' + name,
dtype=tf.float32,
initializer=initial)
def fc_layer(x, num_units, name, use_relu=True):
"""
Create a fully-connected layer
:param x: input from previous layer
:param num_units: number of hidden units in the fully-connected layer
:param name: layer name
:param use_relu: boolean to add ReLU non-linearity (or not)
:return: The output array
"""
in_dim = x.get_shape()[1]
W = weight_variable(name, shape=[in_dim, num_units])
b = bias_variable(name, [num_units])
layer = tf.matmul(x, W)
layer += b
if use_relu:
layer = tf.nn.relu(layer)
return layer
## loss function
def cosine_distance_simple(A, B):
normalize_A = tf.nn.l2_normalize(A,1)
normalize_B = tf.nn.l2_normalize(B,1)
distance_matrix = 1 - tf.matmul(normalize_A, normalize_B, transpose_b=True)
distance_matrix = tf.diag_part(distance_matrix)
distance = tf.reduce_sum(distance_matrix)
return distance
def maintain_length(A, B):
return (tf.norm(A) - tf.norm(B))
from __future__ import division
import tensorflow as tf
# generator network without residual block
def generator(vector, reuse=False, name="generator"):
with tf.variable_scope(name):
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
output_dimension = vector.shape[1]
e1 = fc_layer(vector, 2, name='g_e1', use_relu=False)
e2 = fc_layer(e1, 4, name='g_e2', use_relu=False)
e3 = fc_layer(e2, 8, name='g_e3', use_relu=False)
e4 = fc_layer(e3, 16, name='g_e4', use_relu=False)
e5 = fc_layer(e4, 16, name='g_e5', use_relu=False)
e6 = fc_layer(e5, 8, name='g_e6', use_relu=False)
e7 = fc_layer(e6, 4, name='g_e7', use_relu=False)
e8 = fc_layer(e7, output_dimension, name='g_e8', use_relu=False)
return e8
from __future__ import division
import os
import time
from glob import glob
import tensorflow as tf
import numpy as np
from collections import namedtuple
from sklearn.model_selection import train_test_split
# from module import *
# from utils import *
class cosine_diff_test(object):
def __init__(self, sess, args):
# initialise tensorflow session
self.sess = sess
# data, test, train splits
self.data_A = args.vA
self.data_B = args.vB
self.generator = generator
# when an instance of class cycleGAN is created, build model is automatically called
self._build_model()
def _build_model(self):
#### INPUTS TO NETWORKS
# placeholder for vectors
self.vector_A = tf.placeholder(tf.float32,
[None, 2],
name='vector_A')
self.vector_B = tf.placeholder(tf.float32,
[None, 2],
name='vector_B')
# FCNN to determine vector move required
self.vector_B_ = self.generator(self.vector_B, False, name="generatorB")
# minimise cos_dist between A and B while keeping A same
self.loss = cosine_distance_simple(self.vector_A, self.vector_B_)
+ maintain_length(self.vector_B, self.vector_B_)
'''
self.loss = abs_criterion(self.vector_A, self.vector_A_)
+ abs_criterion(self.vector_B, self.vector_B_)
'''
# trainable variables
t_vars = tf.trainable_variables()
# training variables for generator
self.g_vars = [var for var in t_vars if 'generator' in var.name]
def train(self, args):
# placeholder for learning rate
self.lr = tf.placeholder(tf.float32, None, name='learning_rate')
# define optimizer
self.optim = tf.train.AdamOptimizer(self.lr, beta1=args.beta1).minimize(self.loss, var_list=self.g_vars)
# initialise global varibles and run session
init_op = tf.global_variables_initializer()
self.sess.run(init_op)
lr = args.lr
# Import Data
vecA = self.data_A.copy()
vecB = self.data_B.copy()
results_loss = []
results_vector_B_transformed = []
# iterate over the number of epochs definied
for epoch in range(args.epoch):
# Update
vector_B_transformed, _ = self.sess.run(
[self.vector_B_, self.loss],
feed_dict={self.vector_A: vecA,
self.vector_B: vecB,
self.lr: lr})
results_loss.append(_)
results_vector_B_transformed.append(vector_B_transformed)
print(results_loss)
print(results_vector_B_transformed)
origin = args.orig
print('plotting ...')
plt.xlim((-0.5,1.5));
plt.ylim((-0.5,2.5));
plt.quiver(*origin, vecA, vector_B_transformed,
color=['r','b'],angles='xy', scale_units='xy', scale=1);
class Args():
A_vec = np.array([1, 0]).reshape(1,-1)
B_vec = np.array([0, 2]).reshape(1,-1)
ori = np.array([0, 0]).reshape(1,-1)
epoch = 200
lr = 0.0002
vA = A_vec
vB = B_vec
beta1 = 0.5
orig = ori
args = Args()
# TRAIN
tf.reset_default_graph()
tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
with tf.Session(config=tfconfig) as sess:
model = cosine_diff_test(sess, args)
model.train(args)
```
Ok so I found the problem, simple error in the end:
My optimiser wasn't in my update
vector_B_transformed, _ = self.sess.run(
[self.vector_B_, self.loss],
feed_dict={self.vector_A: vecA,
self.vector_B: vecB,
self.lr: lr})
# Update
vector_B_transformed, _, loss = self.sess.run(
[self.vector_B_, self.optim, self.loss],
feed_dict={self.vector_A: vecA,
self.vector_B: vecB,
self.lr: lr})
My code still isn't quite working as expected, but it is at least trying to optimise something so thats progress!
Answered by Iain MacCormick on March 11, 2021
Get help from others!
Recent Questions
Recent Answers
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP