import numpy as np
import numpy.random as rng
import tensorflow as tf
dtype = tf.float32
[docs]class ConditionalGaussianMade:
"""
Implements a Made, where each conditional probability is modelled by a single gaussian component.
"""
def __init__(self, n_parameters, n_data, n_hiddens, act_fun, output_order='sequential', mode='sequential', input_parameters=None, input_data=None, logpdf=None):
"""
Constructor.
:param n_inputs: number of (conditional) inputs
:param n_outputs: number of outputs
:param n_hiddens: list with number of hidden units for each hidden layer
:param act_fun: tensorflow activation function
:param output_order: order of outputs
:param mode: strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential'
:param input: tensorflow placeholder to serve as input; if None, a new placeholder is created
:param output: tensorflow placeholder to serve as output; if None, a new placeholder is created
"""
# save input arguments
self.n_parameters = n_parameters
self.n_data = n_data
self.n_hiddens = n_hiddens
self.act_fun = act_fun
self.mode = mode
# create network's parameters
degrees = self.create_degrees(output_order)
Ms, Mmp = self.create_masks(degrees)
Wx, Ws, bs, Wm, bm, Wp, bp = self.create_weights_conditional(None)
self.parms = [Wx] + Ws + bs + [Wm, bm, Wp, bp]
self.output_order = degrees[0]
# activation function
f = self.act_fun
# input matrices
self.parameters = tf.placeholder(dtype=dtype,shape=[None,n_parameters],name='parameters') if input_parameters is None else input_parameters
self.data = tf.placeholder(dtype=dtype,shape=[None,n_data],name='data') if input_data is None else input_data
self.logpdf = tf.placeholder(dtype=dtype,shape=[None,1],name='logpdf') if logpdf is None else logpdf
# feedforward propagation
h = f(tf.matmul(self.parameters, Wx) + tf.matmul(self.data, Ms[0] * Ws[0]) + bs[0],name='h1')
for l, (M, W, b) in enumerate(zip(Ms[1:], Ws[1:], bs[1:])):
h = f(tf.matmul(h, M * W) + b,name='h'+str(l + 2))
# output means
self.m = tf.add(tf.matmul(h, Mmp * Wm), bm, name='m')
# output log precisions
self.logp = tf.add(tf.matmul(h, Mmp * Wp), bp, name='logp')
# random numbers driving made
self.u = tf.exp(0.5 * self.logp) * (self.data - self.m)
# log likelihoods
self.L = tf.multiply(-0.5,self.n_data * np.log(2 * np.pi) + \
tf.reduce_sum(self.u ** 2 - self.logp, axis=1,keepdims=True),name='L')
# train objective
self.trn_loss = -tf.reduce_mean(self.L,name='trn_loss')
self.reg_loss = tf.losses.mean_squared_error(self.L, self.logpdf)
[docs] def create_degrees(self, input_order):
"""
Generates a degree for each hidden and input unit. A unit with degree d can only receive input from units with
degree less than d.
:param n_hiddens: a list with the number of hidden units
:param input_order: the order of the inputs; can be 'random', 'sequential', or an array of an explicit order
:param mode: the strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential'
:return: list of degrees
"""
degrees = []
# create degrees for inputs
if isinstance(input_order, str):
if input_order == 'random':
degrees_0 = np.arange(1, self.n_data + 1)
rng.shuffle(degrees_0)
elif input_order == 'sequential':
degrees_0 = np.arange(1, self.n_data + 1)
else:
raise ValueError('invalid output order')
else:
input_order = np.array(input_order)
assert np.all(np.sort(input_order) == np.arange(1, self.n_data + 1)), 'invalid input order'
degrees_0 = input_order
degrees.append(degrees_0)
# create degrees for hiddens
if self.mode == 'random':
for N in self.n_hiddens:
min_prev_degree = min(np.min(degrees[-1]), self.n_data - 1)
degrees_l = rng.randint(min_prev_degree, self.n_data, N)
degrees.append(degrees_l)
elif self.mode == 'sequential':
for N in self.n_hiddens:
degrees_l = np.arange(N) % max(1, self.n_data - 1) + min(1, self.n_data - 1)
degrees.append(degrees_l)
else:
raise ValueError('invalid mode')
return degrees
[docs] def create_masks(self, degrees):
"""
Creates the binary masks that make the connectivity autoregressive.
:param degrees: a list of degrees for every layer
:return: list of all masks, as theano shared variables
"""
Ms = []
for l, (d0, d1) in enumerate(zip(degrees[:-1], degrees[1:])):
M = d0[:, np.newaxis] <= d1
M = tf.constant(M, dtype=dtype, name='M' + str(l+1))
Ms.append(M)
Mmp = degrees[-1][:, np.newaxis] < degrees[0]
Mmp = tf.constant(Mmp, dtype=dtype, name='Mmp')
return Ms, Mmp
[docs] def create_weights_conditional(self, n_comps):
"""
Creates all learnable weight matrices and bias vectors.
:param n_comps: number of gaussian components
:return: weights and biases, as tensorflow variables
"""
Ws = []
bs = []
n_units = np.concatenate(([self.n_data], self.n_hiddens))
Wx = tf.get_variable("Wx", [self.n_parameters, self.n_hiddens[0]], initializer = tf.random_normal_initializer(0., np.sqrt(1./(self.n_parameters + 1))) )
for l, (N0, N1) in enumerate(zip(n_units[:-1], n_units[1:])):
W = tf.get_variable("W"+str(l), [N0, N1], initializer = tf.random_normal_initializer(0., np.sqrt(1./(1+N0))) )
b = tf.get_variable("b"+str(l), [1, N1], initializer = tf.constant_initializer(0.0) )
Ws.append(W)
bs.append(b)
if n_comps is None:
Wm = tf.get_variable("Wm", [n_units[-1], self.n_data], initializer = tf.random_normal_initializer(0., np.sqrt(1./(n_units[-1] + 1))) )
Wp = tf.get_variable("Wp", [n_units[-1], self.n_data], initializer = tf.random_normal_initializer(0., np.sqrt(1./(n_units[-1] + 1))) )
bm = tf.get_variable("bm", [1, self.n_data], initializer = tf.constant_initializer(0.0) )
bp = tf.get_variable("bp", [1, self.n_data], initializer = tf.constant_initializer(0.0) )
return Wx, Ws, bs, Wm, bm, Wp, bp
else:
Wm = tf.get_variable("Wm", [n_units[-1], self.n_data, n_comps], initializer = tf.random_normal_initializer(0., np.sqrt(1./(n_units[-1] + 1))) )
Wp = tf.get_variable("Wp", [n_units[-1], self.n_data, n_comps], initializer = tf.random_normal_initializer(0., np.sqrt(1./(n_units[-1] + 1))) )
Wa = tf.get_variable("Wa", [n_units[-1], self.n_data, n_comps], initializer = tf.random_normal_initializer(0., np.sqrt(1./(n_units[-1] + 1))) )
bm = tf.get_variable("bm", [self.n_data, n_comps], initializer = tf.random_normal_initializer() )
bp = tf.get_variable("bp", [self.n_data, n_comps], initializer = tf.random_normal_initializer() )
ba = tf.get_variable("ba", [self.n_data, n_comps], initializer = tf.random_normal_initializer() )
return Wx, Ws, bs, Wm, bm, Wp, bp, Wa, ba
[docs] def eval(self, xy, sess, log=True):
"""
Evaluate log probabilities for given input-output pairs.
:param xy: a pair (x, y) where x rows are inputs and y rows are outputs
:param sess: tensorflow session where the graph is run
:param log: whether to return probabilities in the log domain
:return: log probabilities: log p(y|x)
"""
x, y = xy
lprob = sess.run(self.L,feed_dict={self.parameters:x,self.data:y})
return lprob if log else np.exp(lprob)
[docs]class ConditionalMaskedAutoregressiveFlow:
"""
Conditional Masked Autoregressive Flow.
"""
def __init__(self, n_parameters, n_data, n_hiddens, act_fun, n_mades,
output_order='sequential', mode='sequential', input_parameters=None, input_data=None, logpdf=None, index=1):
"""
Constructor.
:param n_parameters: number of (conditional) inputs
:param n_data: number of outputs
:param n_hiddens: list with number of hidden units for each hidden layer
:param act_fun: tensorflow activation function
:param n_mades: number of mades in the flow
:param output_order: order of outputs of last made
:param mode: strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential'
:param input_parameters: tensorflow placeholder to serve as input for the parameters part of the training data; if None, a new placeholder is created
:param input_data: tensorflow placeholder to serve as input for data-realizations part of the training data; if None, a new placeholder is created
:param index: index of the NDE; crucial when using ensembles of NDEs to keep their scopes separate
"""
# save input arguments
self.n_parameters = n_parameters
self.n_data = n_data
self.n_hiddens = n_hiddens
self.act_fun = act_fun
self.n_mades = n_mades
self.mode = mode
self.parameters = tf.placeholder(dtype=dtype,shape=[None,n_parameters],name='parameters') if input_parameters is None else input_parameters
self.data = tf.placeholder(dtype=dtype,shape=[None,n_data],name='data') if input_data is None else input_data
self.logpdf = tf.placeholder(dtype=dtype,shape=[None,1],name='logpdf') if logpdf is None else logpdf
self.parms = []
self.mades = []
self.bns = []
self.u = self.data
self.logdet_dudy = 0.0
for i in range(n_mades):
# create a new made
with tf.variable_scope('nde_' + str(index) + '_made_' + str(i + 1)):
made = ConditionalGaussianMade(n_parameters, n_data, n_hiddens, act_fun,
output_order, mode, self.parameters, self.u)
self.mades.append(made)
self.parms += made.parms
output_order = output_order if output_order is 'random' else made.output_order[::-1]
# inverse autoregressive transform
self.u = made.u
self.logdet_dudy += 0.5 * tf.reduce_sum(made.logp, axis=1,keepdims=True)
self.output_order = self.mades[0].output_order
# log likelihoods
self.L = tf.add(-0.5 * n_data * np.log(2 * np.pi) - 0.5 * tf.reduce_sum(self.u ** 2, axis=1,keepdims=True), self.logdet_dudy,name='L')
# train objective
self.trn_loss = -tf.reduce_mean(self.L,name='trn_loss')
self.reg_loss = tf.losses.mean_squared_error(self.L, self.logpdf)
[docs] def eval(self, xy, sess, log=True):
"""
Evaluate log probabilities for given input-output pairs.
:param xy: a pair (x, y) where x rows are inputs and y rows are outputs
:param sess: tensorflow session where the graph is run
:param log: whether to return probabilities in the log domain
:return: log probabilities: log p(y|x)
"""
x, y = xy
lprob = sess.run(self.L,feed_dict={self.parameters:x,self.data:y})
return lprob if log else np.exp(lprob)
[docs]class MixtureDensityNetwork:
"""
Implements a Mixture Density Network for modeling p(y|x)
"""
def __init__(self, n_parameters, n_data, n_components = 3, n_hidden=[50,50], activations=[tf.tanh, tf.tanh],
input_parameters=None, input_data=None, logpdf=None, index=1):
"""
Constructor.
:param n_parameters: number of (conditional) inputs
:param n_data: number of outputs (ie dimensionality of distribution you're parameterizing)
:param n_hiddens: list with number of hidden units for each hidden layer
:param activations: tensorflow activation functions for each hidden layer
:param input: tensorflow placeholder to serve as input; if None, a new placeholder is created
:param output: tensorflow placeholder to serve as output; if None, a new placeholder is created
"""
# save input arguments
self.n_parameters = n_parameters
self.n_data = n_data
self.M = n_components
self.N = int((self.n_data + self.n_data * (self.n_data + 1) / 2 + 1)*self.M)
self.n_hidden = n_hidden
self.activations = activations
self.parameters = tf.placeholder(dtype=dtype,shape=[None,self.n_parameters],name='parameters') if input_parameters is None else input_parameters
self.data = tf.placeholder(dtype=dtype,shape=[None,self.n_data],name='data') if input_data is None else input_data
self.logpdf = tf.placeholder(dtype=dtype,shape=[None,1],name='logpdf') if logpdf is None else logpdf
# Build the layers of the network
self.layers = [self.parameters]
self.weights = []
self.biases = []
for i in range(len(self.n_hidden)):
with tf.variable_scope('nde_' + str(index) + '_layer_' + str(i + 1)):
if i == 0:
self.weights.append(tf.get_variable("weights", [self.n_parameters, self.n_hidden[i]], initializer = tf.random_normal_initializer(0., np.sqrt(2./self.n_parameters))))
self.biases.append(tf.get_variable("biases", [self.n_hidden[i]], initializer = tf.constant_initializer(0.0)))
elif i == len(self.n_hidden) - 1:
self.weights.append(tf.get_variable("weights", [self.n_hidden[i], self.N], initializer = tf.random_normal_initializer(0., np.sqrt(2./self.n_hidden[i]))))
self.biases.append(tf.get_variable("biases", [self.N], initializer = tf.constant_initializer(0.0)))
else:
self.weights.append(tf.get_variable("weights", [self.n_hidden[i], self.n_hidden[i + 1]], initializer = tf.random_normal_initializer(0., np.sqrt(2/self.n_hidden[i]))))
self.biases.append(tf.get_variable("biases", [self.n_hidden[i + 1]], initializer = tf.constant_initializer(0.0)))
if i < len(self.n_hidden) - 1:
self.layers.append(self.activations[i](tf.add(tf.matmul(self.layers[-1], self.weights[-1]), self.biases[-1])))
else:
self.layers.append(tf.add(tf.matmul(self.layers[-1], self.weights[-1]), self.biases[-1]))
# Map the output layer to mixture model parameters
self.mu, self.sigma, self.alpha = tf.split(self.layers[-1], [self.M * self.n_data, self.M * self.n_data * (self.n_data + 1) // 2, self.M], 1)
self.mu = tf.reshape(self.mu, (-1, self.M, self.n_data))
self.sigma = tf.reshape(self.sigma, (-1, self.M, self.n_data * (self.n_data + 1) // 2))
self.alpha = tf.nn.softmax(self.alpha)
self.Sigma = tf.contrib.distributions.fill_triangular(self.sigma)
self.Sigma = self.Sigma - tf.linalg.diag(tf.linalg.diag_part(self.Sigma)) + tf.linalg.diag(tf.exp(tf.linalg.diag_part(self.Sigma)))
self.det = tf.reduce_prod(tf.linalg.diag_part(self.Sigma), axis=-1)
self.mu = tf.identity(self.mu, name = "mu")
self.Sigma = tf.identity(self.Sigma, name = "Sigma")
self.alpha = tf.identity(self.alpha, name = "alpha")
self.det = tf.identity(self.det, name = "det")
# Log likelihoods
self.L = tf.log(tf.reduce_sum(tf.exp(-0.5*tf.reduce_sum(tf.square(tf.einsum("ijlk,ijk->ijl", self.Sigma, tf.subtract(tf.expand_dims(self.data, 1), self.mu))), 2) + tf.log(self.alpha) + tf.log(self.det) - self.n_data*np.log(2. * np.pi) / 2.), 1, keepdims=True) + 1e-37, name = "L")
# Objective loss function
self.trn_loss = -tf.reduce_mean(self.L, name = "trn_loss")
self.reg_loss = tf.losses.mean_squared_error(self.L, self.logpdf)
[docs] def eval(self, xy, sess, log=True):
"""
Evaluate log probabilities for given input-output pairs.
:param xy: a pair (x, y) where x rows are inputs and y rows are outputs
:param sess: tensorflow session where the graph is run
:param log: whether to return probabilities in the log domain
:return: log probabilities: log p(y|x)
"""
x, y = xy
lprob = sess.run(self.L,feed_dict={self.parameters:x,self.data:y})
return lprob if log else np.exp(lprob)