Source code for simplenet.simplenet

"""simplenet.simplenet :: Define SimpleNet class and common functions."""

from typing import Callable, List, Sequence, Tuple, Union  # noqa

import numpy as np


DataArray = Union[Sequence[int], Sequence[float], np.ndarray]


[docs]def sigmoid(arr: np.ndarray, der: bool = False) -> np.array:
    r"""Calculate the sigmoid activation function.

    .. math::
        \frac{1}{1 + e ^ {-x}}

    Derivative:

    .. math::
        x * (1 - x)

    Args:
        arr: Input array of weighted sums
    Returns:
        Array of outputs from 0 to 1
    """
    activations = 1 / (1 + np.exp(-arr))
    if der is True:
        return activations * (1 - activations)
    return activations


[docs]def softmax(arr: np.ndarray) -> np.ndarray:
    r"""Calculate the softmax activation function.

    This equation uses a "stable softmax" that subtracts the maximum from the
    exponents, but which should not change the results.

    .. math::
        \frac{e^x}{\sum_{} {e^x}}

    Args:
        arr: Input array of weighted sums
    Returns:
        Array of outputs from 0 to 1
    """
    exps = np.exp(arr - np.max(arr, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)


[docs]def neg_log_likelihood(y_hat: np.ndarray, targets: np.ndarray,
                       der: bool = False) -> float:
    r"""Calculate the negative log likelihood loss.

    I believe this is also called the binary cross-entropy loss function.

    Args:
        y_hat: Array of predicted values from 0 to 1
        targets: Array of true values
    Returns:
        Mean loss for the sample
    """
    m = y_hat.shape[0]

    if der is True:
        return (1 / m) * (y_hat - targets)
    return -(1 / m) * np.sum(
            targets * np.log(y_hat) + (1 - targets) * np.log(1 - y_hat)
            )


[docs]def cross_entropy(y_hat: np.ndarray, targets: np.ndarray,
                  der: bool = False) -> float:
    """Calculate the categorical cross entropy loss.

    Args:
        y_hat: Array of predicted values from 0 to 1
        targets: Array of true values
    Returns:
        Mean loss for the sample
    """
    m = y_hat.shape[0]

    if der is True:
        return (1 / m) * (y_hat - targets)
    return -(1 / m) * np.sum(targets * np.log(y_hat))


[docs]def relu(arr: np.ndarray, der: bool = False) -> np.ndarray:
    """Calculate the relu activation function.

    Args:
        arr: Input array
        der: Whether to calculate the derivative
    Returns:
        Array of outputs from 0 to maximum of the array in a given axis
    """
    if der is True:
        return np.where(arr <= 0, 0, 1)
    return np.maximum(arr, 0)


[docs]class SimpleNet:
    """Simple example of a multilayer perceptron."""

[docs]    def __init__(
        self,
        hidden_layer_sizes: Sequence[int],
        input_shape: Tuple[int, int],
        output_shape: Tuple[int, int],
        activation_function: Callable[..., np.ndarray] = sigmoid,
        output_activation: Callable[..., np.ndarray] = sigmoid,
        loss_function: Callable[..., float] = neg_log_likelihood,
        learning_rate: float = 1.,
        dtype: str = 'float32',
        seed: int = None,
    ) -> None:
        """Initialize the MPL.

        Args:
            hidden_layer_sizes: Number of neurons in each hidden layer
            input_shape: Shape of inputs (m x n), use `None` for unknown m
            output_shape: Shape of outputs (m x o), use `None` for unknown m
            activation_function: Activation function for all layers prior to
                                 output
            output_activation: Activation function for output layer
            learning_rate: learning rate
            dtype: Data type for floats (e.g. np.float32 vs np.float64)
            seed: Optional random seed for consistent outputs (for debugging)
        """
        self.dtype = dtype
        np.random.seed(seed=seed)
        layer_sizes = ([input_shape[1]] + list(hidden_layer_sizes) +
                       [output_shape[1]])

        self.weights = [
                np.random.uniform(
                        size=(layer_size, next_layer_size),
                        low=-((2 / (layer_size + next_layer_size)) ** 0.5),
                        high=((2 / (layer_size + next_layer_size)) ** 0.5),
                        ).astype(self.dtype)
                for layer_size, next_layer_size in
                zip(layer_sizes, layer_sizes[1:])
            ]

        self.zs = [np.full((size, 1), np.nan, dtype=self.dtype)
                   for size in layer_sizes[1:]]
        self.outputs = [z.copy() for z in self.zs]
        self.biases = [np.zeros((1, layer_size), dtype=self.dtype)
                       for layer_size in layer_sizes[1:]]

        self.activation_function = activation_function
        self.learning_rate = learning_rate
        self.output_activation = output_activation
        self.loss_function = loss_function

    def _forward(self, inputs: np.ndarray) -> None:
        """Perform the forward pass.

        Args:
            inputs: Array of input values
        """
        self.zs[0] = np.dot(inputs, self.weights[0]) + self.biases[0]
        self.outputs[0] = self.activation_function(self.zs[0])

        for layer_num in range(1, len(self.weights)):
            self.zs[layer_num] = np.dot(self.outputs[layer_num - 1],
                                        self.weights[layer_num]) + \
                                 self.biases[layer_num]

            if layer_num < len(self.weights) - 1:
                self.outputs[layer_num] = self.activation_function(
                    self.zs[layer_num])
            else:
                self.outputs[layer_num] = self.output_activation(
                    self.zs[layer_num])

    def _backprop(self, inputs: np.ndarray, targets: np.ndarray) -> None:
        """Calculate gradients and perform the backward pass.

        Args:
            inputs: Array of input values
            targets: Array of true outputs
        """
        y_hat = self.outputs[-1]

        self.err = self.loss_function(y_hat=y_hat, targets=targets)

        dws = []  # type: List[np.ndarray]
        dbs = []  # type: List[np.ndarray]
        dzs = [self.loss_function(y_hat=y_hat, targets=targets, der=True)]

        for output, weight, z in zip(self.outputs[-2::-1],
                                     self.weights[::-1],
                                     self.zs[-2::-1]):
            dws.insert(0, np.dot(output.T, dzs[0]))
            dbs.insert(0, np.sum(dzs[0], axis=0, keepdims=True))

            dzs.insert(0, np.dot(dzs[0], weight.T) *
                       self.activation_function(z, der=True))

        dws.insert(0, np.dot(inputs.T, dzs[0]))
        dbs.insert(0, np.sum(dzs[0], axis=0, keepdims=True))

        for idx, (dw, db) in enumerate(zip(dws, dbs)):
            self.weights[idx] -= self.learning_rate * dw
            self.biases[idx] -= self.learning_rate * db

[docs]    def learn(self, inputs: DataArray, targets: DataArray) -> None:
        """Perform a forward and backward pass, updating weights.

        Args:
            inputs: Array of input values
            targets: Array of true outputs
        """
        inputs = np.array(inputs, dtype=self.dtype)
        targets = np.array(targets, dtype=self.dtype)
        self._forward(inputs=inputs)
        self._backprop(inputs=inputs, targets=targets)

[docs]    def predict(self, inputs: DataArray) -> np.ndarray:
        """Use existing weights to predict outputs for given inputs.

        Note: this method does *not* update weights.

        Args:
            inputs: Array of inputs for which to make predictions
        Returns:
            Array of predictions
        """
        inputs = np.array(inputs, dtype=self.dtype)

        zs = [z.copy() for z in self.zs]
        outputs = [output.copy() for output in self.outputs]

        zs[0] = np.dot(inputs, self.weights[0]) + self.biases[0]
        outputs[0] = self.activation_function(zs[0])

        for layer_num in range(1, len(self.weights)):
            zs[layer_num] = np.dot(outputs[layer_num - 1],
                                   self.weights[layer_num]) + \
                            self.biases[layer_num]

            if layer_num < len(self.weights) - 1:
                outputs[layer_num] = self.activation_function(
                    zs[layer_num])
            else:
                outputs[layer_num] = self.output_activation(
                    zs[layer_num])

        return outputs[-1]

[docs]    def validate(self, inputs: np.ndarray, targets: np.ndarray,
                 epsilon: float = 1e-7) -> bool:
        """Use gradient checking to validate backpropagation.

        This method uses a naive implementation of gradient checking to try to
        verify the analytic gradients.

        Args:
            inputs: Array of input values
            targets: Array of true outputs
            epsilon: Small value by which to perturb values for gradient
                     checking
        Returns:
            Boolean reflecting whether or not the gradients seem to match
        """
        targets_arr = np.array(targets, dtype=self.dtype)

        weight_grads = []  # type: List[List[List[float]]]
        bias_grads = []  # type: List[List[List[float]]]

        backup_weights = [weight.copy() for weight in self.weights]
        backup_biases = [bias.copy() for bias in self.biases]

        for layer_num, layer_weights in enumerate(self.weights):
            layer_weight_grads = []  # type: List[List[float]]
            layer_bias_grads = [[]]  # type: List[List[float]]

            for neuron_num, neuron_weights in enumerate(layer_weights):

                neuron_weight_grads = []

                for weight_num, weight in enumerate(neuron_weights):
                    self.weights[layer_num][neuron_num][weight_num] = \
                        weight + epsilon
                    outputs = self.predict(inputs)
                    cost_plus = self.loss_function(y_hat=outputs,
                                                   targets=targets_arr)

                    self.weights[layer_num][neuron_num][weight_num] = \
                        weight - epsilon
                    outputs = self.predict(inputs)
                    cost_minus = self.loss_function(y_hat=outputs,
                                                    targets=targets_arr)

                    self.weights = [backup_weight.copy()
                                    for backup_weight in backup_weights]
                    weight_grad = (cost_plus - cost_minus) / (2 * epsilon)
                    neuron_weight_grads.append(weight_grad)

                    # Biases are shape (1, len(next_layer)), and
                    # len(next_layer) == len(neuron_weights) so only set biases
                    # once per neuron, using the neuron's weight_num to index
                    if neuron_num == 0:
                        bias = self.biases[layer_num][0][weight_num]

                        self.biases[layer_num][0][weight_num] = bias + epsilon
                        outputs = self.predict(inputs)
                        cost_plus = self.loss_function(y_hat=outputs,
                                                       targets=targets_arr)

                        self.biases[layer_num][0][weight_num] = bias - epsilon
                        outputs = self.predict(inputs)
                        cost_minus = self.loss_function(y_hat=outputs,
                                                        targets=targets_arr)

                        self.biases = [backup_bias.copy()
                                       for backup_bias in backup_biases]
                        neuron_bias_grad = (cost_plus - cost_minus) / \
                            (2 * epsilon)
                        layer_bias_grads[0].append(neuron_bias_grad)

                layer_weight_grads.append(neuron_weight_grads)

            weight_grads.append(layer_weight_grads)
            bias_grads.append(layer_bias_grads)

        self.learn(inputs=inputs, targets=targets)

        weight_deltas = []
        bias_deltas = []

        for weight_before, weight_after, bias_before, bias_after in \
                zip(backup_weights, self.weights, backup_biases, self.biases):
            weight_deltas.append(
                    (weight_before - weight_after) / self.learning_rate)
            bias_deltas.append((bias_before - bias_after) / self.learning_rate)

        self.weights = [backup_weight.copy()
                        for backup_weight in backup_weights]
        self.biases = [backup_bias.copy() for backup_bias in backup_biases]

        pairs = {
                "weight": (weight_grads, weight_deltas),
                "bias": (bias_grads, bias_deltas),
                }
        for k, pair in pairs.items():
            for idx, (calculated, analytic) in enumerate(zip(*pair)):
                if not np.allclose(calculated, analytic):
                    width = 25
                    print("Wrong {} gradient suspected around layer {}."
                          .format(k, idx))
                    header = ("{'calculated':^{width}}"
                              "{'analytic':^{width}}"
                              "{'diff':^{width}}")
                    print(header.format(width=width))

                    for c, a in zip(np.array(calculated).reshape(-1),
                                    analytic.reshape(-1)):
                        print("{:^{width}}{a:^{width}}{c-a:^{width}}"
                              .format(c, a, c-a, width=width))

                    return False

        print("All {} gradients check out.".format(k))
        return True

[docs]    def export_model(self, filename: str) -> None:
        """Export the learned biases and weights to a file.

        Saves each weight and bias in order with an index and a prefix of `W`
        or `b` to ensure it can be restored in the proper order.

        Args:
            filename: Filename for the saved file.
        """
        pad = len(str(len(self.weights)))
        weights = {"W{:0{pad}}".format(idx, pad=pad): self.weights[idx]
                   for idx in range(len(self.weights))}
        biases = {"b{:0{pad}}".format(idx, pad=pad): self.weights[idx]
                  for idx in range(len(self.weights))}
        np.savez(filename, **weights, **biases)

[docs]    def import_model(self, filename: str) -> None:
        """Import learned biases and weights from a file.

        Args:
            filename: Name of file from which to import
        """
        model = np.load(filename)
        self.weights = [model[k] for k in sorted(model.keys())
                        if k.startswith("W")]
        self.biases = [model[k] for k in sorted(model.keys())
                       if k.startswith("b")]