Source code for simplenet.simplenet

"""simplenet.simplenet :: Define SimpleNet class and common functions."""

from typing import Callable, List, Sequence, Tuple, Union  # noqa

import numpy as np


DataArray = Union[Sequence[int], Sequence[float], np.ndarray]


[docs]def sigmoid(arr: np.ndarray, der: bool = False) -> np.array: r"""Calculate the sigmoid activation function. .. math:: \frac{1}{1 + e ^ {-x}} Derivative: .. math:: x * (1 - x) Args: arr: Input array of weighted sums Returns: Array of outputs from 0 to 1 """ activations = 1 / (1 + np.exp(-arr)) if der is True: return activations * (1 - activations) return activations
[docs]def softmax(arr: np.ndarray) -> np.ndarray: r"""Calculate the softmax activation function. This equation uses a "stable softmax" that subtracts the maximum from the exponents, but which should not change the results. .. math:: \frac{e^x}{\sum_{} {e^x}} Args: arr: Input array of weighted sums Returns: Array of outputs from 0 to 1 """ exps = np.exp(arr - np.max(arr, axis=1, keepdims=True)) return exps / np.sum(exps, axis=1, keepdims=True)
[docs]def neg_log_likelihood(y_hat: np.ndarray, targets: np.ndarray, der: bool = False) -> float: r"""Calculate the negative log likelihood loss. I believe this is also called the binary cross-entropy loss function. Args: y_hat: Array of predicted values from 0 to 1 targets: Array of true values Returns: Mean loss for the sample """ m = y_hat.shape[0] if der is True: return (1 / m) * (y_hat - targets) return -(1 / m) * np.sum( targets * np.log(y_hat) + (1 - targets) * np.log(1 - y_hat) )
[docs]def cross_entropy(y_hat: np.ndarray, targets: np.ndarray, der: bool = False) -> float: """Calculate the categorical cross entropy loss. Args: y_hat: Array of predicted values from 0 to 1 targets: Array of true values Returns: Mean loss for the sample """ m = y_hat.shape[0] if der is True: return (1 / m) * (y_hat - targets) return -(1 / m) * np.sum(targets * np.log(y_hat))
[docs]def relu(arr: np.ndarray, der: bool = False) -> np.ndarray: """Calculate the relu activation function. Args: arr: Input array der: Whether to calculate the derivative Returns: Array of outputs from 0 to maximum of the array in a given axis """ if der is True: return np.where(arr <= 0, 0, 1) return np.maximum(arr, 0)
[docs]class SimpleNet: """Simple example of a multilayer perceptron."""
[docs] def __init__( self, hidden_layer_sizes: Sequence[int], input_shape: Tuple[int, int], output_shape: Tuple[int, int], activation_function: Callable[..., np.ndarray] = sigmoid, output_activation: Callable[..., np.ndarray] = sigmoid, loss_function: Callable[..., float] = neg_log_likelihood, learning_rate: float = 1., dtype: str = 'float32', seed: int = None, ) -> None: """Initialize the MPL. Args: hidden_layer_sizes: Number of neurons in each hidden layer input_shape: Shape of inputs (m x n), use `None` for unknown m output_shape: Shape of outputs (m x o), use `None` for unknown m activation_function: Activation function for all layers prior to output output_activation: Activation function for output layer learning_rate: learning rate dtype: Data type for floats (e.g. np.float32 vs np.float64) seed: Optional random seed for consistent outputs (for debugging) """ self.dtype = dtype np.random.seed(seed=seed) layer_sizes = ([input_shape[1]] + list(hidden_layer_sizes) + [output_shape[1]]) self.weights = [ np.random.uniform( size=(layer_size, next_layer_size), low=-((2 / (layer_size + next_layer_size)) ** 0.5), high=((2 / (layer_size + next_layer_size)) ** 0.5), ).astype(self.dtype) for layer_size, next_layer_size in zip(layer_sizes, layer_sizes[1:]) ] self.zs = [np.full((size, 1), np.nan, dtype=self.dtype) for size in layer_sizes[1:]] self.outputs = [z.copy() for z in self.zs] self.biases = [np.zeros((1, layer_size), dtype=self.dtype) for layer_size in layer_sizes[1:]] self.activation_function = activation_function self.learning_rate = learning_rate self.output_activation = output_activation self.loss_function = loss_function
def _forward(self, inputs: np.ndarray) -> None: """Perform the forward pass. Args: inputs: Array of input values """ self.zs[0] = np.dot(inputs, self.weights[0]) + self.biases[0] self.outputs[0] = self.activation_function(self.zs[0]) for layer_num in range(1, len(self.weights)): self.zs[layer_num] = np.dot(self.outputs[layer_num - 1], self.weights[layer_num]) + \ self.biases[layer_num] if layer_num < len(self.weights) - 1: self.outputs[layer_num] = self.activation_function( self.zs[layer_num]) else: self.outputs[layer_num] = self.output_activation( self.zs[layer_num]) def _backprop(self, inputs: np.ndarray, targets: np.ndarray) -> None: """Calculate gradients and perform the backward pass. Args: inputs: Array of input values targets: Array of true outputs """ y_hat = self.outputs[-1] self.err = self.loss_function(y_hat=y_hat, targets=targets) dws = [] # type: List[np.ndarray] dbs = [] # type: List[np.ndarray] dzs = [self.loss_function(y_hat=y_hat, targets=targets, der=True)] for output, weight, z in zip(self.outputs[-2::-1], self.weights[::-1], self.zs[-2::-1]): dws.insert(0, np.dot(output.T, dzs[0])) dbs.insert(0, np.sum(dzs[0], axis=0, keepdims=True)) dzs.insert(0, np.dot(dzs[0], weight.T) * self.activation_function(z, der=True)) dws.insert(0, np.dot(inputs.T, dzs[0])) dbs.insert(0, np.sum(dzs[0], axis=0, keepdims=True)) for idx, (dw, db) in enumerate(zip(dws, dbs)): self.weights[idx] -= self.learning_rate * dw self.biases[idx] -= self.learning_rate * db
[docs] def learn(self, inputs: DataArray, targets: DataArray) -> None: """Perform a forward and backward pass, updating weights. Args: inputs: Array of input values targets: Array of true outputs """ inputs = np.array(inputs, dtype=self.dtype) targets = np.array(targets, dtype=self.dtype) self._forward(inputs=inputs) self._backprop(inputs=inputs, targets=targets)
[docs] def predict(self, inputs: DataArray) -> np.ndarray: """Use existing weights to predict outputs for given inputs. Note: this method does *not* update weights. Args: inputs: Array of inputs for which to make predictions Returns: Array of predictions """ inputs = np.array(inputs, dtype=self.dtype) zs = [z.copy() for z in self.zs] outputs = [output.copy() for output in self.outputs] zs[0] = np.dot(inputs, self.weights[0]) + self.biases[0] outputs[0] = self.activation_function(zs[0]) for layer_num in range(1, len(self.weights)): zs[layer_num] = np.dot(outputs[layer_num - 1], self.weights[layer_num]) + \ self.biases[layer_num] if layer_num < len(self.weights) - 1: outputs[layer_num] = self.activation_function( zs[layer_num]) else: outputs[layer_num] = self.output_activation( zs[layer_num]) return outputs[-1]
[docs] def validate(self, inputs: np.ndarray, targets: np.ndarray, epsilon: float = 1e-7) -> bool: """Use gradient checking to validate backpropagation. This method uses a naive implementation of gradient checking to try to verify the analytic gradients. Args: inputs: Array of input values targets: Array of true outputs epsilon: Small value by which to perturb values for gradient checking Returns: Boolean reflecting whether or not the gradients seem to match """ targets_arr = np.array(targets, dtype=self.dtype) weight_grads = [] # type: List[List[List[float]]] bias_grads = [] # type: List[List[List[float]]] backup_weights = [weight.copy() for weight in self.weights] backup_biases = [bias.copy() for bias in self.biases] for layer_num, layer_weights in enumerate(self.weights): layer_weight_grads = [] # type: List[List[float]] layer_bias_grads = [[]] # type: List[List[float]] for neuron_num, neuron_weights in enumerate(layer_weights): neuron_weight_grads = [] for weight_num, weight in enumerate(neuron_weights): self.weights[layer_num][neuron_num][weight_num] = \ weight + epsilon outputs = self.predict(inputs) cost_plus = self.loss_function(y_hat=outputs, targets=targets_arr) self.weights[layer_num][neuron_num][weight_num] = \ weight - epsilon outputs = self.predict(inputs) cost_minus = self.loss_function(y_hat=outputs, targets=targets_arr) self.weights = [backup_weight.copy() for backup_weight in backup_weights] weight_grad = (cost_plus - cost_minus) / (2 * epsilon) neuron_weight_grads.append(weight_grad) # Biases are shape (1, len(next_layer)), and # len(next_layer) == len(neuron_weights) so only set biases # once per neuron, using the neuron's weight_num to index if neuron_num == 0: bias = self.biases[layer_num][0][weight_num] self.biases[layer_num][0][weight_num] = bias + epsilon outputs = self.predict(inputs) cost_plus = self.loss_function(y_hat=outputs, targets=targets_arr) self.biases[layer_num][0][weight_num] = bias - epsilon outputs = self.predict(inputs) cost_minus = self.loss_function(y_hat=outputs, targets=targets_arr) self.biases = [backup_bias.copy() for backup_bias in backup_biases] neuron_bias_grad = (cost_plus - cost_minus) / \ (2 * epsilon) layer_bias_grads[0].append(neuron_bias_grad) layer_weight_grads.append(neuron_weight_grads) weight_grads.append(layer_weight_grads) bias_grads.append(layer_bias_grads) self.learn(inputs=inputs, targets=targets) weight_deltas = [] bias_deltas = [] for weight_before, weight_after, bias_before, bias_after in \ zip(backup_weights, self.weights, backup_biases, self.biases): weight_deltas.append( (weight_before - weight_after) / self.learning_rate) bias_deltas.append((bias_before - bias_after) / self.learning_rate) self.weights = [backup_weight.copy() for backup_weight in backup_weights] self.biases = [backup_bias.copy() for backup_bias in backup_biases] pairs = { "weight": (weight_grads, weight_deltas), "bias": (bias_grads, bias_deltas), } for k, pair in pairs.items(): for idx, (calculated, analytic) in enumerate(zip(*pair)): if not np.allclose(calculated, analytic): width = 25 print("Wrong {} gradient suspected around layer {}." .format(k, idx)) header = ("{'calculated':^{width}}" "{'analytic':^{width}}" "{'diff':^{width}}") print(header.format(width=width)) for c, a in zip(np.array(calculated).reshape(-1), analytic.reshape(-1)): print("{:^{width}}{a:^{width}}{c-a:^{width}}" .format(c, a, c-a, width=width)) return False print("All {} gradients check out.".format(k)) return True
[docs] def export_model(self, filename: str) -> None: """Export the learned biases and weights to a file. Saves each weight and bias in order with an index and a prefix of `W` or `b` to ensure it can be restored in the proper order. Args: filename: Filename for the saved file. """ pad = len(str(len(self.weights))) weights = {"W{:0{pad}}".format(idx, pad=pad): self.weights[idx] for idx in range(len(self.weights))} biases = {"b{:0{pad}}".format(idx, pad=pad): self.weights[idx] for idx in range(len(self.weights))} np.savez(filename, **weights, **biases)
[docs] def import_model(self, filename: str) -> None: """Import learned biases and weights from a file. Args: filename: Name of file from which to import """ model = np.load(filename) self.weights = [model[k] for k in sorted(model.keys()) if k.startswith("W")] self.biases = [model[k] for k in sorted(model.keys()) if k.startswith("b")]