How to do it...

We proceed with the recipe as follows:

The first step is importing all the necessary modules:

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
%matplotlib inline

Load the dataset:

mnist = input_data.read_data_sets("MNIST_data/")
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels

Next, we define the class, StackedAutoencoder . The class __init__ method contains a list containing a number of neurons in each autoencoder, starting from the first input autoencoder and the learning rate. As each layer will have different dimensions for input and output, we choose a dictionary data structure to represent weights, biases, and inputs for each layer:

class StackedAutoEncoder(object):
 def __init__(self, list1, eta = 0.02):
 """
 list1: [input_dimension, hidden_layer_1, ....,hidden_layer_n]
 """
 N = len(list1)-1
 self._m = list1[0]
 self.learning_rate = eta
 
 # Create the Computational graph
 self._W = {}
 self._b = {}
 self._X = {}
 self._X['0'] = tf.placeholder('float', [None, list1[0]])
 
 for i in range(N):
 layer = '{0}'.format(i+1)
 print('AutoEncoder Layer {0}: {1} --> {2}'.format(layer, list1[i], list1[i+1]))
 self._W['E' + layer] = tf.Variable(tf.random_normal(shape=(list1[i], list1[i+1])),name='WtsEncoder'+layer)
 self._b['E'+ layer] = tf.Variable(np.zeros(list1[i+1]).astype(np.float32),name='BiasEncoder'+layer)
 self._X[layer] = tf.placeholder('float', [None, list1[i+1]])
 self._W['D' + layer] = tf.transpose(self._W['E' + layer]) # Shared weights
 self._b['D' + layer] = tf.Variable(np.zeros(list1[i]).astype(np.float32),name='BiasDecoder' + layer)
  
 # Placeholder for inputs
 self._X_noisy = tf.placeholder('float', [None, self._m])

We build a computation graph to define the optimization parameters for each autoencoder while pretraining. It involves defining the reconstruction loss for each autoencoder when the output of the previous autoencoder's Encoder is its input. To do this, we define the class methods, pretrain and one_pass, which return the training ops and output of the Encoder respectively for each stacked autoencoder:

 self.train_ops = {}
 self.out = {}
 
 
 for i in range(N):
 layer = '{0}'.format(i+1)
 prev_layer = '{0}'.format(i)
 opt = self.pretrain(self._X[prev_layer], layer)
 self.train_ops[layer] = opt
 self.out[layer] = self.one_pass(self._X[prev_layer], self._W['E'+layer], self._b['E'+layer], self._b['D'+layer])

We build the computation graph for the fine-tuning of the complete stacked autoencoder. To do this, we make use of the class methods, encoder and decoder:

self.y = self.encoder(self._X_noisy,N) #Encoder output 
self.r = self.decoder(self.y,N) # Decoder ouput
 
optimizer = tf.train.AdamOptimizer(self.learning_rate) 
error = self._X['0'] - self.r # Reconstruction Error
 
self._loss = tf.reduce_mean(tf.pow(error, 2))
self._opt = optimizer.minimize(self._loss)

Finally, we define the class method, fit, to perform the batch-wise pretraining of each autoencoder, followed by fine-tuning. While pretraining, we use the non-corrupted input, and for fine-tuning, we use the corrupted input. This allows us to use the stacked autoencoder to reconstruct even from the noisy input:

def fit(self, Xtrain, Xtr_noisy, layers, epochs = 1, batch_size = 100):
 N, D = Xtrain.shape
 num_batches = N // batch_size
 X_noisy = {}
 X = {}
 X_noisy ['0'] = Xtr_noisy
 X['0'] = Xtrain
 
 for i in range(layers):
 Xin = X[str(i)]
 print('Pretraining Layer ', i+1)
 for e in range(5):
 for j in range(num_batches):
 batch = Xin[j * batch_size: (j * batch_size + batch_size)]
 self.session.run(self.train_ops[str(i+1)], feed_dict= {self._X[str(i)]: batch})
 print('Pretraining Finished')
 X[str(i+1)] = self.session.run(self.out[str(i+1)], feed_dict = {self._X[str(i)]: Xin})
 
 
 
 obj = []
 for i in range(epochs):
 for j in range(num_batches):
 batch = Xtrain[j * batch_size: (j * batch_size + batch_size)]
 batch_noisy = Xtr_noisy[j * batch_size: (j * batch_size + batch_size)]
 _, ob = self.session.run([self._opt,self._loss], feed_dict={self._X['0']: batch, self._X_noisy: batch_noisy})
 if j % 100 == 0 :
 print('training epoch {0} batch {2} cost {1}'.format(i,ob, j)) 
 obj.append(ob)
 return obj

The different class methods are as follows:

def encoder(self, X, N):
 x = X
 for i in range(N):
 layer = '{0}'.format(i+1)
 hiddenE = tf.nn.sigmoid(tf.matmul(x, self._W['E'+layer]) + self._b['E'+layer])
 x = hiddenE
 return x

def decoder(self, X, N):
 x = X
 for i in range(N,0,-1):
 layer = '{0}'.format(i)
 hiddenD = tf.nn.sigmoid(tf.matmul(x, self._W['D'+layer]) + self._b['D'+layer])
 x = hiddenD
 return x

def set_session(self, session):
 self.session = session

def reconstruct(self,x, n_layers):
 h = self.encoder(x, n_layers)
 r = self.decoder(h, n_layers)
 return self.session.run(r, feed_dict={self._X['0']: x})

def pretrain(self, X, layer ):
 y = tf.nn.sigmoid(tf.matmul(X, self._W['E'+layer]) + self._b['E'+layer])
 r =tf.nn.sigmoid(tf.matmul(y, self._W['D'+layer]) + self._b['D'+layer])
 
 # Objective Function
 error = X - r # Reconstruction Error
  loss = tf.reduce_mean(tf.pow(error, 2))
 opt = tf.train.AdamOptimizer(.001).minimize(loss, var_list = 
 [self._W['E'+layer],self._b['E'+layer],self._b['D'+layer]])
  return opt

def one_pass(self, X, W, b, c):
 h = tf.nn.sigmoid(tf.matmul(X, W) + b)
 return h

We use the corruption function defined in the Denoising autoencoder recipe to corrupt the images and, finally, create a StackAutoencoder and train it:

Xtrain = trX.astype(np.float32)
Xtrain_noisy = corruption(Xtrain).astype(np.float32)
Xtest = teX.astype(np.float32)
Xtest_noisy = corruption(Xtest).astype(np.float32) 
_, m = Xtrain.shape

list1 = [m, 500, 50] # List with number of neurons in Each hidden layer, starting from input layer
n_layers = len(list1)-1
autoEncoder = StackedAutoEncoder(list1)

#Initialize all variables
init = tf.global_variables_initializer()

 
with tf.Session() as sess:
 sess.run(init)
 autoEncoder.set_session(sess)
 err = autoEncoder.fit(Xtrain, Xtrain_noisy, n_layers, epochs=30)
 out = autoEncoder.reconstruct(Xtest_noisy[0:100],n_layers)

The reconstruction error versus epochs, as the stacked autoencoder is fine-tuned, is given here. You can see that, due to pretraining, we already start from a very low reconstruction loss:

plt.plot(err)
plt.xlabel('epochs')
plt.ylabel('Fine Tuning Reconstruction Error')

The plot is as follows:

Let's now check the performance of our network. Here is the denoised handwritten images, when the network is presented with the noisy test images:

Table of Contents for How to do it...

Create new playlist

Sign In

Sign Up

Table of Contents for
How to do it...