We are now going to go through the facial expression recognition project that we have worked on in the past, but we will use TensorFlow as our framework of choice this time! We will be creating a neural network that has 2000 units in the first hidden layer, and 1000 units in the second hidden layer. We can start with our imports.
from __future__ import print_function, division
from builtins import range
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
% matplotlib inline
"""----------------------- Function to get data -----------------------------"""
def getData(balance_ones=True):
# images are 48x48 = 2304 size vectors
Y = []
X = []
first = True
for line in open('../../../data/fer/fer2013.csv'):
if first:
first = False
else:
row = line.split(',')
Y.append(int(row[0]))
X.append([int(p) for p in row[1].split()])
X, Y = np.array(X) / 255.0, np.array(Y)
if balance_ones:
# balance the 1 class
X0, Y0 = X[Y!=1, :], Y[Y!=1]
X1 = X[Y==1, :]
X1 = np.repeat(X1, 9, axis=0)
X = np.vstack([X0, X1])
Y = np.concatenate((Y0, [1]*len(X1)))
return X, Y
""" --------- Creates indicator (N x K), from an input N x 1 y matrix --------"""
def y2indicator(y):
N = len(y)
K = len(set(y))
ind = np.zeros((N, K))
for i in range(N):
ind[i, y[i]] = 1
return ind
""" ----------- Gives the error rate between targets and predictions ---------------- """
def error_rate(targets, predictions):
return np.mean(targets != predictions)
""" Rectifier Linear Unit - an activation function that can be used in a neural network """
def relu(x):
return x * (x > 0)
"""
Function to initialize a weight matrix and a bias. M1 is the input size, and M2 is the output size
W is a matrix of size M1 x M2, which is randomized initialy to a gaussian normal
We make the standard deviation of this the sqrt of size in + size out
The bias is initialized as zeros. Each is then turned into float 32s so that they can be used in
Theano and TensorFlow
"""
def init_weight_and_bias(M1, M2):
W = np.random.randn(M1, M2) / np.sqrt(M1)
b = np.zeros(M2)
return W.astype(np.float32), b.astype(np.float32)
class HiddenLayer(object):
def __init__(self, M1, M2, an_id):
self.id = an_id
self.M1 = M1
self.M2 = M2
W, b = init_weight_and_bias(M1, M2)
self.W = tf.Variable(W.astype(np.float32))
self.b = tf.Variable(b.astype(np.float32))
self.params = [self.W, self.b]
def forward(self, X):
return tf.nn.relu(tf.matmul(X, self.W) + self.b)
class ANN(object):
def __init__(self, hidden_layer_sizes):
self.hidden_layer_sizes = hidden_layer_sizes
def fit(self, X, Y, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False):
K = len(set(Y)) # won't work later b/c we turn it into indicator
# make a validation set
X, Y = shuffle(X, Y)
X = X.astype(np.float32)
Y = y2indicator(Y).astype(np.float32)
# Y = Y.astype(np.int32)
Xvalid, Yvalid = X[-1000:], Y[-1000:]
Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate
X, Y = X[:-1000], Y[:-1000]
# initialize hidden layers
N, D = X.shape
self.hidden_layers = []
M1 = D
count = 0
for M2 in self.hidden_layer_sizes:
h = HiddenLayer(M1, M2, count)
self.hidden_layers.append(h)
M1 = M2
count += 1
W, b = init_weight_and_bias(M1, K)
self.W = tf.Variable(W.astype(np.float32))
self.b = tf.Variable(b.astype(np.float32))
# collect params for later use
self.params = [self.W, self.b]
for h in self.hidden_layers:
self.params += h.params
# set up theano functions and variables
tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
act = self.forward(tfX)
rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=act,
labels=tfT
)
) + rcost
prediction = self.predict(tfX)
train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)
n_batches = N // batch_sz
costs = []
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
for i in range(epochs):
X, Y = shuffle(X, Y)
for j in range(n_batches):
Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})
if j % 20 == 0:
c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid})
costs.append(c)
p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid})
e = error_rate(Yvalid_flat, p)
print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
if show_fig:
plt.plot(costs)
plt.show()
def forward(self, X):
Z = X
for h in self.hidden_layers:
Z = h.forward(Z)
return tf.matmul(Z, self.W) + self.b
def predict(self, X):
act = self.forward(X)
return tf.argmax(act, 1)
def main():
X, Y = getData()
# X, Y = getBinaryData()
model = ANN([2000, 1000, 500])
model.fit(X, Y, show_fig=True)
if __name__ == '__main__':
main()