Hands-on-Supervised-Machine.../packtml/neural_net/transfer.py

155 lines
5.8 KiB
Python

# -*- coding: utf-8 -*-
#
# Author: Taylor G Smith <taylor.smith@alkaline-ml.com>
#
# A simple transfer learning classifier. If you find yourself struggling
# to follow the derivation of the back-propagation, check out this great
# refresher on scalar & matrix calculas + differential equations.
# http://parrt.cs.usfca.edu/doc/matrix-calculus/index.html
from __future__ import absolute_import
import numpy as np
from packtml.neural_net.base import NeuralMixin, tanh
from packtml.base import BaseSimpleEstimator
from packtml.neural_net.mlp import NeuralNetClassifier, _calculate_loss
__all__ = [
'TransferLearningClassifier'
]
try:
xrange
except NameError:
xrange = range
def _pretrained_forward_step(X, pt_weights, pt_biases):
"""Complete a forward step from the pre-trained model"""
# progress through all the layers (the output was already trimmed off)
for w, b in zip(pt_weights, pt_biases):
X = tanh(X.dot(w) + b)
return X
class TransferLearningClassifier(BaseSimpleEstimator, NeuralMixin):
"""A transfer learning classifier.
Create a multi-layer perceptron classifier that learned from a
previously-trained network. No fine-tuning is performed, and no
prior-trained layers can be retrained (i.e., they remain frozen).
Parameters
----------
X : array-like, shape=(n_samples, n_features)
The training array. Should be a numpy array or array-like structure
with only finite values.
y : array-like, shape=(n_samples,)
The target vector.
pretrained : NeuralNetClassifier, TransferLearningClassifier
The pre-trained MLP. The transfer learner leverages the features
extracted from the pre-trained network (the trained weights without
the output layer) and uses them to transform the input data before
training the new layers.
hidden : iterable, optional (default=(25,))
An iterable indicating the number of units per hidden layer.
n_iter : int, optional (default=10)
The default number of iterations to perform.
learning_rate : float, optional (default=0.001)
The rate at which we descend the gradient.
random_state : int, None or RandomState, optional (default=42)
The random state for initializing the weights matrices.
"""
def __init__(self, X, y, pretrained, hidden=(25,), n_iter=10,
regularization=0.01, learning_rate=0.001, random_state=42):
# initialize via the NN static method
self.hidden = hidden
self.random_state = random_state
self.n_iter = n_iter
self.learning_rate = learning_rate
self.regularization = regularization
# this is the previous model
self.model = pretrained
# assert that it's a neural net or we'll break down later
assert isinstance(pretrained, NeuralMixin), \
"Pre-trained model must be a neural network!"
# initialize weights, biases, etc. for THE TRAINABLE LAYERS ONLY!
pt_w, pt_b = pretrained.export_weights_and_biases(output_layer=False)
X, y, weights, biases = NeuralNetClassifier._init_weights_biases(
X, y, hidden, random_state,
# use as the last dim the column dimension of the last weights
# (the ones BEFORE the output layer, that is)
last_dim=pt_w[-1].shape[1])
# we can train this in a similar fashion to the plain MLP we designed:
# for each iteration, feed X through the network, compute the loss,
# and back-propagate the error to correct the weights.
train_loss = []
for _ in xrange(n_iter):
# first, pass the input data through the pre-trained model's
# hidden layers. Do not pass it through the last layer, however,
# since we don't want its output from the softmax layer.
X_transform = _pretrained_forward_step(X, pt_w, pt_b)
# NOW we complete a forward step on THIS model's
# untrained weights/biases
out, layer_results = NeuralNetClassifier._forward_step(
X_transform, weights, biases)
# compute the loss on the output
loss = _calculate_loss(truth=y, preds=out, weights=pt_w + weights,
l2=self.regularization)
train_loss.append(loss)
# now back-propagate to correct THIS MODEL's weights and biases via
# gradient descent. NOTE we do NOT adjust the pre-trained model's
# weights!!!
NeuralNetClassifier._back_propagate(
truth=y, probas=out, layer_results=layer_results,
weights=weights, biases=biases,
learning_rate=learning_rate,
l2=self.regularization)
# save the weights, biases
self.weights = weights
self.biases = biases
self.train_loss = train_loss
def predict(self, X):
# compute the probabilities and then get the argmax for each class
probas = self.predict_proba(X)
# we want the argmaxes of each row
return np.argmax(probas, axis=1)
def predict_proba(self, X):
# Compute a forward step with the pre-trained model first:
pt_w, pt_b = self.model.export_weights_and_biases(output_layer=False)
X_transform = _pretrained_forward_step(X, pt_w, pt_b)
# and then complete a forward step with the trained weights and biases
return NeuralNetClassifier._forward_step(
X_transform, self.weights, self.biases)[0]
def export_weights_and_biases(self, output_layer=True):
pt_weights, pt_biases = \
self.model.export_weights_and_biases(output_layer=False)
w = pt_weights + self.weights
b = pt_biases + self.biases
if output_layer:
return w, b
return w[:-1], b[:-1]