How to do it...

The code here is based on the paper Autoencoding Variational Bayes by Kingma and Welling (https://arxiv.org/pdf/1312.6114.pdf), and is adapted from GitHub: https://jmetzen.github.io/2015-11-27/vae.html .

  1. The first step is as always importing the necessary modules. For this recipe we will require Numpy, Matplolib, and TensorFlow:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
  1. Next, we define the VariationalAutoencoder class. The class __init__ method defines the hyperparameters such as the learning rate, batch size, the placeholders for the input, and the weight and bias variables for the encoder and decoder network. It also builds the computational graph according to the network architecture of the VAE. We initialize the weights using Xavier initialization in this recipe. Instead of defining our own method for Xavier initialization, we use the tf.contrib.layers.xavier_initializer() TensorFlow to do the task. Lastly, we define the loss (generation and latent) and optimizer ops:
class VariationalAutoencoder(object):
def __init__(self, network_architecture, transfer_fct=tf.nn.softplus,
learning_rate=0.001, batch_size=100):
self.network_architecture = network_architecture
self.transfer_fct = transfer_fct
self.learning_rate = learning_rate
self.batch_size = batch_size
# Place holder for the input
self.x = tf.placeholder(tf.float32, [None, network_architecture["n_input"]])
# Define weights and biases
network_weights = self._initialize_weights(**self.network_architecture)
# Create autoencoder network
# Use Encoder Network to determine mean and
# (log) variance of Gaussian distribution in latent
# space
self.z_mean, self.z_log_sigma_sq =
self._encoder_network(network_weights["weights_encoder"],
network_weights["biases_encoder"])
# Draw one sample z from Gaussian distribution
n_z = self.network_architecture["n_z"]
eps = tf.random_normal((self.batch_size, n_z), 0, 1, dtype=tf.float32)
# z = mu + sigma*epsilon
self.z = tf.add(self.z_mean,tf.multiply(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))

# Use Decoder network to determine mean of
# Bernoulli distribution of reconstructed input
self.x_reconstr_mean =
self._decoder_network(network_weights["weights_decoder"],
network_weights["biases_decoder"])
# Define loss function based variational upper-bound and
# corresponding optimizer
# define generation loss
generation_loss =
-tf.reduce_sum(self.x * tf.log(1e-10 + self.x_reconstr_mean)
+ (1-self.x) * tf.log(1e-10 + 1 - self.x_reconstr_mean), 1)
latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq
- tf.square(self.z_mean)- tf.exp(self.z_log_sigma_sq), 1)
self.cost = tf.reduce_mean(generation_loss + latent_loss) # average over batch
# Define the optimizer
self.optimizer =
tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
# Initializing the tensor flow variables
init = tf.global_variables_initializer()
# Launch the session
self.sess = tf.InteractiveSession()
self.sess.run(init)

def _initialize_weights(self, n_hidden_recog_1, n_hidden_recog_2,
n_hidden_gener_1, n_hidden_gener_2,
n_input, n_z):
initializer = tf.contrib.layers.xavier_initializer()
all_weights = dict()
all_weights['weights_encoder'] = {
'h1': tf.Variable(initializer(shape=(n_input, n_hidden_recog_1))),
'h2': tf.Variable(initializer(shape=(n_hidden_recog_1, n_hidden_recog_2))),
'out_mean': tf.Variable(initializer(shape=(n_hidden_recog_2, n_z))),
'out_log_sigma': tf.Variable(initializer(shape=(n_hidden_recog_2, n_z)))}
all_weights['biases_encoder'] = {
'b1': tf.Variable(tf.zeros([n_hidden_recog_1], dtype=tf.float32)),
'b2': tf.Variable(tf.zeros([n_hidden_recog_2], dtype=tf.float32)),
'out_mean': tf.Variable(tf.zeros([n_z], dtype=tf.float32)),
'out_log_sigma': tf.Variable(tf.zeros([n_z], dtype=tf.float32))}

all_weights['weights_decoder'] = {
'h1': tf.Variable(initializer(shape=(n_z, n_hidden_gener_1))),
'h2': tf.Variable(initializer(shape=(n_hidden_gener_1, n_hidden_gener_2))),
'out_mean': tf.Variable(initializer(shape=(n_hidden_gener_2, n_input))),
'out_log_sigma': tf.Variable(initializer(shape=(n_hidden_gener_2, n_input)))}

all_weights['biases_decoder'] = {
'b1': tf.Variable(tf.zeros([n_hidden_gener_1], dtype=tf.float32)),
'b2': tf.Variable(tf.zeros([n_hidden_gener_2], dtype=tf.float32)),'out_mean': tf.Variable(tf.zeros([n_input], dtype=tf.float32)),
'out_log_sigma': tf.Variable(tf.zeros([n_input], dtype=tf.float32))}
return all_weights
  1. We build the encoder network and the decoder network. The first layer of the Encoder network is taking the input and generating a reduced latent representation of the input. The second layer maps the input to a Gaussian distribution. The network learns these transformations:
def _encoder_network(self, weights, biases):
# Generate probabilistic encoder (recognition network), which
# maps inputs onto a normal distribution in latent space.
# The transformation is parametrized and can be learned.
layer_1 = self.transfer_fct(tf.add(tf.matmul(self.x, weights['h1']),
biases['b1']))
layer_2 = self.transfer_fct(tf.add(tf.matmul(layer_1, weights['h2']),
biases['b2']))
z_mean = tf.add(tf.matmul(layer_2, weights['out_mean']),
biases['out_mean'])
z_log_sigma_sq =
tf.add(tf.matmul(layer_2, weights['out_log_sigma']),
biases['out_log_sigma'])
return (z_mean, z_log_sigma_sq)

def _decoder_network(self, weights, biases):
# Generate probabilistic decoder (decoder network), which
# maps points in latent space onto a Bernoulli distribution in data space.
# The transformation is parametrized and can be learned.
layer_1 = self.transfer_fct(tf.add(tf.matmul(self.z, weights['h1']),
biases['b1']))
layer_2 = self.transfer_fct(tf.add(tf.matmul(layer_1, weights['h2']),
biases['b2']))
x_reconstr_mean =
tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['out_mean']),
biases['out_mean']))
return x_reconstr_mean
  1. The class VariationalAutoencoder also contains some helper functions to generate and reconstruct data, and to fit the VAE:
def fit(self, X):
opt, cost = self.sess.run((self.optimizer, self.cost),
feed_dict={self.x: X})
return cost

def generate(self, z_mu=None):
""" Generate data by sampling from latent space.
If z_mu is not None, data for this point in latent space is
generated. Otherwise, z_mu is drawn from prior in latent
space.
"""
if z_mu is None:
z_mu = np.random.normal(size=self.network_architecture["n_z"])
# Note: This maps to mean of distribution, we could alternatively
# sample from Gaussian distribution
return self.sess.run(self.x_reconstr_mean,
feed_dict={self.z: z_mu})

def reconstruct(self, X):
""" Use VAE to reconstruct given data. """
return self.sess.run(self.x_reconstr_mean,
feed_dict={self.x: X})
  1. Once the VAE class is done, we define a function train, which uses the VAE class object and trains it for a given data.
def train(network_architecture, learning_rate=0.001,
batch_size=100, training_epochs=10, display_step=5):
vae = VariationalAutoencoder(network_architecture,
learning_rate=learning_rate,
batch_size=batch_size)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(n_samples / batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs, _ = mnist.train.next_batch(batch_size)
# Fit training using batch data
cost = vae.fit(batch_xs)
# Compute average loss
avg_cost += cost / n_samples * batch_size
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),
"cost=", "{:.9f}".format(avg_cost))
return vae
  1. Let us now use the VAE class and train function. We use the VAE for our favorite MNIST dataset:
# Load MNIST data in a format suited for tensorflow.
# The script input_data is available under this URL:
#https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/tutorials/mnist/input_data.py

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
n_samples = mnist.train.num_examples

  1. We define the network-architecture and perform training of VAE on MNIST dataset. In this case, we keep the latent dimensions 2 for simplicity.
network_architecture = 
dict(n_hidden_recog_1=500, # 1st layer encoder neurons
n_hidden_recog_2=500, # 2nd layer encoder neurons
n_hidden_gener_1=500, # 1st layer decoder neurons
n_hidden_gener_2=500, # 2nd layer decoder neurons
n_input=784, # MNIST data input (img shape: 28*28)
n_z=2) # dimensionality of latent space
vae = train(network_architecture, training_epochs=75)
  1. Let us now see if the VAE really reconstructs the input or not. The output shows that digits are indeed reconstructed, and since we have used a 2D latent space, there is a significant blurring of the images:
x_sample = mnist.test.next_batch(100)[0]
x_reconstruct = vae.reconstruct(x_sample)
plt.figure(figsize=(8, 12))
for i in range(5):
plt.subplot(5, 2, 2*i + 1)
plt.imshow(x_sample[i].reshape(28, 28), vmin=0, vmax=1, cmap="gray")
plt.title("Test input")
plt.colorbar()
plt.subplot(5, 2, 2*i + 2)
plt.imshow(x_reconstruct[i].reshape(28, 28), vmin=0, vmax=1, cmap="gray")
plt.title("Reconstruction")
plt.colorbar()
plt.tight_layout()

Following is the output of the preceding code:

An example of MNIST reconstructed characters
  1. The following are the samples of handwritten digits generated using the trained VAE:
nx = ny = 20
x_values = np.linspace(-3, 3, nx)
y_values = np.linspace(-3, 3, ny)
canvas = np.empty((28*ny, 28*nx))
for i, yi in enumerate(x_values):
for j, xi in enumerate(y_values):
z_mu = np.array([[xi, yi]]*vae.batch_size)
x_mean = vae.generate(z_mu)
canvas[(nx-i-1)*28:(nx-i)*28, j*28:(j+1)*28] = x_mean[0].reshape(28, 28)
plt.figure(figsize=(8, 10))
Xi, Yi = np.meshgrid(x_values, y_values)
plt.imshow(canvas, origin="upper", cmap="gray")
plt.tight_layout()

Following is the range of MNIST like characters generated by autoencoders:

A range of MNIST like characters generated by autoencoders
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset