TensorFlow: CosineDifference ObjFunc Constant throughout training

Question

The following example is a simplified version of what I'm working on. I'm trying to find a neural network which minimises the cosine distance. The reason I have implemented my own cosine difference loss function, rather than using tensorflow's built in method, is because in the full version of my project, it does not quite meet my requirement (although in this simple version they are equivalent.)

I input two orthogonal vectors (A and B) into the network. I am trying to reduce the cosine distance between (A and B.) It does this by minimising a loss function (which also includes a component which preserves the length of vector B as its being transformed.) Ultimately my output should be a vector which has the same direction as vector A with the length of vector B.

The problem I am having is that the network outputs "vector_B_transformed" and this vector never changes. The loss function I have built also is constant throughout training. I've tried initialising the weights differently but that hasn't helped. I NEVER have a relu function on the final layer of my fully connected network, and have experimented with relu activation functions on the hidden layer - but this doesn't seem to make a difference.

I am crudely appending the results into a list and printing them out to terminal. Epochs reduced to 200 but same problem when increasing.

If anyone can help me it would be greatly appreciated, as I'm really stuck.

import math
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.framework import ops

# from utils import *

##### New Helper Functions

# weight and bais wrappers
def weight_variable(name, shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.truncated_normal_initializer(stddev=0.01)
    return tf.get_variable('W_' + name,
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)

def bias_variable(name, shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
    initial = tf.constant(0., shape=shape, dtype=tf.float32)
    return tf.get_variable('b_' + name,
                           dtype=tf.float32,
                           initializer=initial)

def fc_layer(x, num_units, name, use_relu=True):
    """
    Create a fully-connected layer
    :param x: input from previous layer
    :param num_units: number of hidden units in the fully-connected layer
    :param name: layer name
    :param use_relu: boolean to add ReLU non-linearity (or not)
    :return: The output array
    """
    in_dim = x.get_shape()[1]
    W = weight_variable(name, shape=[in_dim, num_units])
    b = bias_variable(name, [num_units])
    layer = tf.matmul(x, W)
    layer += b
    if use_relu:
        layer = tf.nn.relu(layer)
    return layer

## loss function
def cosine_distance_simple(A, B):

normalize_A = tf.nn.l2_normalize(A,1)        
    normalize_B = tf.nn.l2_normalize(B,1)

distance_matrix = 1 - tf.matmul(normalize_A, normalize_B, transpose_b=True)

distance_matrix = tf.diag_part(distance_matrix)

distance = tf.reduce_sum(distance_matrix)

return distance

def maintain_length(A, B):

return (tf.norm(A) - tf.norm(B))

from __future__ import division
import tensorflow as tf

# generator network without residual block
def generator(vector, reuse=False, name="generator"):

with tf.variable_scope(name):
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

output_dimension = vector.shape[1]

e1 = fc_layer(vector, 2, name='g_e1', use_relu=False)
        e2 = fc_layer(e1, 4, name='g_e2', use_relu=False)    
        e3 = fc_layer(e2, 8, name='g_e3', use_relu=False)
        e4 = fc_layer(e3, 16, name='g_e4', use_relu=False)
        e5 = fc_layer(e4, 16, name='g_e5', use_relu=False)
        e6 = fc_layer(e5, 8, name='g_e6', use_relu=False)
        e7 = fc_layer(e6, 4, name='g_e7', use_relu=False)
        e8 = fc_layer(e7, output_dimension, name='g_e8', use_relu=False)

return e8

from __future__ import division
import os
import time
from glob import glob
import tensorflow as tf
import numpy as np
from collections import namedtuple
from sklearn.model_selection import train_test_split

# from module import *
# from utils import *

class cosine_diff_test(object):
    def __init__(self, sess, args):
        # initialise tensorflow session
        self.sess = sess

# data, test, train splits
        self.data_A = args.vA
        self.data_B = args.vB

self.generator = generator

# when an instance of class cycleGAN is created, build model is automatically called
        self._build_model()

def _build_model(self):

#### INPUTS TO NETWORKS
        # placeholder for vectors
        self.vector_A = tf.placeholder(tf.float32,
                                     [None, 2],
                                     name='vector_A')
        self.vector_B = tf.placeholder(tf.float32,
                                    [None, 2],
                                    name='vector_B')

# FCNN to determine vector move required
        self.vector_B_ = self.generator(self.vector_B, False, name="generatorB")

# minimise cos_dist between A and B while keeping A same

self.loss = cosine_distance_simple(self.vector_A, self.vector_B_) 
                        + maintain_length(self.vector_B, self.vector_B_)
        '''

self.loss = abs_criterion(self.vector_A, self.vector_A_) 
                + abs_criterion(self.vector_B, self.vector_B_)
        '''

# trainable variables
        t_vars = tf.trainable_variables()

# training variables for generator
        self.g_vars = [var for var in t_vars if 'generator' in var.name]

def train(self, args):
        # placeholder for learning rate
        self.lr = tf.placeholder(tf.float32, None, name='learning_rate')

# define optimizer
        self.optim = tf.train.AdamOptimizer(self.lr, beta1=args.beta1).minimize(self.loss, var_list=self.g_vars)

# initialise global varibles and run session
        init_op = tf.global_variables_initializer()

self.sess.run(init_op)

lr = args.lr

# Import Data
        vecA = self.data_A.copy()
        vecB = self.data_B.copy()

results_loss = []
        results_vector_B_transformed = []

# iterate over the number of epochs definied
        for epoch in range(args.epoch):

# Update 
            vector_B_transformed, _ = self.sess.run(
                [self.vector_B_, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})

results_loss.append(_)
            results_vector_B_transformed.append(vector_B_transformed)

print(results_loss)
        print(results_vector_B_transformed)

origin = args.orig
        print('plotting ...')
        plt.xlim((-0.5,1.5));
        plt.ylim((-0.5,2.5));
        plt.quiver(*origin, vecA, vector_B_transformed, 
                   color=['r','b'],angles='xy', scale_units='xy', scale=1);

class Args():
    A_vec = np.array([1, 0]).reshape(1,-1)
    B_vec = np.array([0, 2]).reshape(1,-1)
    ori = np.array([0, 0]).reshape(1,-1)

epoch = 200
    lr = 0.0002
    vA = A_vec
    vB = B_vec
    beta1 = 0.5
    orig = ori

args = Args()

# TRAIN
tf.reset_default_graph()

tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
with tf.Session(config=tfconfig) as sess:
    model = cosine_diff_test(sess, args)
    model.train(args) 
```

Iain MacCormick · Answer

Ok so I found the problem, simple error in the end:

My optimiser wasn't in my update

vector_B_transformed, _ = self.sess.run(
                [self.vector_B_, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})

# Update 
            vector_B_transformed, _, loss = self.sess.run(
                [self.vector_B_, self.optim, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})

My code still isn't quite working as expected, but it is at least trying to optimise something so thats progress!

TensorFlow: CosineDifference ObjFunc Constant throughout training

One Answer

Add your own answers!

Ask a Question