TransWikia.com

CNN not learning - Tensorflow implementation of a binary image classification problem

Data Science Asked by Osama Khafagy on September 8, 2020

I designed a custom net architecture (16 layers net) for a binary image classification task and here some highlights:

  • input image: 100x100x3 of two classes(car and person)(normalized and randomized)
  • output layer: sigmoid activation
  • loss layer: tf.nn.sigmoid_cross_entropy_with_logits()
  • learning rate: 1e-4 (tested Adam/GD optimizers)

data samples

Car (0 label)
Person (1 label)

The problem:

  • the cost oscillates around 0.7
  • prediction is the same value for all test images (e.g 0.13254)

data loading code

def load_image_list(train=True):
train_data_folder_prefix = 'natural_images/reshaped_images/train/'
test_data_folder_prefix = 'natural_images/reshaped_images/test/'
if train:
    data_addresses = glob.glob(train_data_folder_prefix + '*.jpg')
else:
    data_addresses = glob.glob(test_data_folder_prefix + '*.jpg')
return data_addresses

def randomize_data(data_addresses):
    return np.random.permutation(data_addresses)

def load_addr_batch(data_addresses, batch_size):
    batch_addr = np.random.choice(data_addresses, size=batch_size)
    return batch_addr

def load_image_batch(data_addresses):
    '''print([np.array(Image.open(fname)).shape for fname in data_addresses])'''
    batch = np.array([np.array(Image.open(fname)) for fname in data_addresses], dtype=np.float32)
    # normalizing images
    #batch /= 255.0
    return batch

def create_labels(data_addresses, bin_encoding=False):
    digits = '0123456789_'
    classes = [address.split('/')[-1].split('.')[0].strip(digits) for address in data_addresses]
    #print(labels) for debugging
    if bin_encoding:
        labels = [0 if 'car' in cl else 1 for cl in classes]
    return np.array(labels, dtype=np.float32)[:, np.newaxis], classes

def one_hot_encode(data_classes):
    m = len(data_classes)
    one_hot_encoded_data = np.zeros((m, 2))
    for i in range(m):
        if data_classes[i] == 'car':
            one_hot_encoded_data[i] = [1, 0]
        else:
            one_hot_encoded_data[i] = [0, 1]
    return np.array(one_hot_encoded_data)

def load_random_train_batch(batch_size=32):
    # load image addresses
    data_addresses = load_image_list()
    # randomize them
    rand_addresess = randomize_data(data_addresses)
    # load 'batch_size' images
    batch_addresses = load_addr_batch(rand_addresess, batch_size)
    # load images
    X_data = load_image_batch(batch_addresses)
    # create labels
    Y_labels, classes  = create_labels(batch_addresses, bin_encoding=True)
    Y_ohe = one_hot_encode(classes)
    return X_data, Y_labels, Y_ohe

def load_random_test_batch(n_images=1):
    data_addresses = load_image_list(train=False)
    batch_addresses = load_addr_batch(data_addresses, n_images)
    # load images
    X_data = load_image_batch(batch_addresses)
    # create labels
    Y_labels, classes = create_labels(batch_addresses, bin_encoding=True)
    Y_ohe = one_hot_encode(classes)
    return X_data, Y_labels, Y_ohe

def load_train_batch(start, end):
    # load image addresses
    data_addresses = randomize_data(load_image_list())
    # load 'batch_size' images
    batch_addresses = data_addresses[start:end]
    # load images
    X_data = load_image_batch(batch_addresses)
    # create labels
    Y_labels, classes  = create_labels(batch_addresses, bin_encoding=True)
    Y_ohe = one_hot_encode(classes)
    return X_data, Y_labels, Y_ohe

model code

def model(X):
"""
X: tensor of shape (m, 100, 100, 3)
returns: tensor of shape (m, 1)
"""
m = X.get_shape().as_list()[0]
in_channels = X.get_shape().as_list()[-1]
# layer 1
W1 = weight_variable([5, 5, in_channels, 16], 'W1')
b1 = bias_variable([1, 1, 1, 16], 'b1')
Z1 = tf.nn.conv2d(X, W1, strides=1, padding='VALID')
A1 = tf.nn.relu(Z1+b1)
# layer 2
P1 = tf.nn.max_pool(A1, ksize=[1,2,2,1], strides=2, padding='VALID')
P1 = tf.layers.batch_normalization(P1)
# layer 3
W2 = weight_variable([3, 3, 16, 32], 'W2')
b2 = bias_variable([1, 1, 1, 32], 'b2')
Z2 = tf.nn.conv2d(P1, W2, strides=1, padding='VALID')
A2 = tf.nn.relu(Z2+b2)
# layer 3
P2 = tf.nn.max_pool(A2, ksize=[1,2,2,1], strides=2, padding='VALID')
P2 = tf.layers.batch_normalization(P2)
# layer 4
W3 = weight_variable([2, 2, 32, 64], 'W3')
b3 = bias_variable([1, 1, 1, 64], 'b3')
Z3 = tf.nn.conv2d(P2, W3, strides=1, padding='VALID')
A3 = tf.nn.relu(Z3+b3)
# layer 5
P3 = tf.nn.max_pool(A3, ksize=[1,2,2,1], strides=2, padding='VALID')
P3 = tf.layers.batch_normalization(P3)
# layer 6
W4 = weight_variable([2, 2, 64, 128], 'W4')
b4 = bias_variable([1, 1, 1, 128], 'b4')
Z4 = tf.nn.conv2d(P3, W4, strides=1, padding='VALID')
A4 = tf.nn.relu(Z4+b4)
# layer 7
P4 = tf.nn.max_pool(A4, ksize=[1,2,2,1], strides=2, padding='VALID')
# layer 8
W5 = weight_variable([2, 2, 128, 256], 'W5')
b5 = bias_variable([1, 1, 1, 256], 'b5')
Z5 = tf.nn.conv2d(P4, W5, strides=1, padding='VALID')
A5 = tf.nn.relu(Z5+b5)
# layer 9
P5 = tf.nn.max_pool(A5, ksize=[1,2,2,1], strides=2, padding='VALID')
# layer 10
W6 = weight_variable([1, 1, 256, 512], 'W6')
b6 = bias_variable([1, 1, 1, 512], 'b6')
Z6 = tf.nn.conv2d(P5, W6, strides=1, padding='VALID')
A6 = tf.nn.relu(Z6+b6)
# layer 11
P6 = tf.nn.max_pool(A6, ksize=[1,2,2,1], strides=2, padding='VALID')
# layer 12
W7 = weight_variable([1, 1, 512, 1024], 'W7')
b7 = bias_variable([1, 1, 1, 1024], 'b7')
Z7 = tf.nn.conv2d(P6, W7, strides=1, padding='VALID')
A7 = tf.nn.relu(Z7+b7)
# layer 13
Z8 = tf.reshape(A7, [-1, 1024])
# layer 14
Z9 = tf.contrib.layers.fully_connected(Z8, 512, activation_fn=tf.nn.relu)
# layer 15
Z10 = tf.contrib.layers.fully_connected(Z9, 16, activation_fn=tf.nn.tanh)
# layer 16
Z11 = tf.contrib.layers.fully_connected(Z10, 1, activation_fn=tf.nn.sigmoid)
return Z11

training code

m = 1500
epochs = 100
BATCH_SIZE = 128
epsilon = 1e-6

# create input/output placeholder
# data is being loaded online
Xs = tf.placeholder(dtype=tf.float32, shape=[None, 100, 100, 3])
Y_true = tf.placeholder(dtype=tf.float32, shape=[None, 1])
# create model
Y_pred = model(Xs)
# define loss
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Y_pred, labels=Y_true))
# define optimizer
train_step = tf.train.GradientDescentOptimizer(learning_rate=1e-4).minimize(cost)
# init all variables
init = tf.global_variables_initializer()

# start session
with tf.Session() as sess:
    sess.run(init)
    loss_summary = []
    current_loss = 100
    n_chunks = int(m / BATCH_SIZE)
    for epoch in range(epochs):
        print('*'*50)
        print('Epoch {}'.format(epoch+1))
        for i in range(n_chunks):
            # grab a mini-batch
            start = i*BATCH_SIZE
            end = (i+1)*BATCH_SIZE
            print('-'*15)
            print('iteration {}'.format(i+1))
            X_data, Y_labels, _ = load_train_batch(start, end)
            print(X_data.shape)
            sess.run(train_step, feed_dict={Xs: X_data, Y_true: Y_labels})
            pred, current_loss = sess.run([Y_pred, cost], feed_dict={Xs: X_data, Y_true: Y_labels})
            loss_summary.append(current_loss)
            """print('Y_true: ', Y_labels)
            print('Y_pred: ', pred)"""
            print('cost', current_loss)

Add your own answers!

Ask a Question

Get help from others!

© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP