module Num::NN

Extended Modules

Num::NN

Defined in:

nn/backends/cpu/activation.cr
nn/backends/cpu/convolution.cr
nn/backends/cpu/dropout.cr
nn/backends/cpu/loss.cr
nn/backends/cpu/maxpool.cr
nn/backends/cpu/optimizers.cr
nn/backends/opencl/activation.cr
nn/backends/opencl/dropout.cr
nn/backends/opencl/loss.cr
nn/backends/opencl/optimizers.cr
nn/datasets/datasets.cr
nn/datasets/iris.cr
nn/datasets/mnist.cr
nn/initialization.cr
nn/primitives/layer.cr
nn/primitives/validation.cr

Instance Method Summary

#compute_fans(*shape : Int)
#conv2d(input : Tensor(Float32, CPU(Float32)), weight : Tensor(Float32, CPU(Float32)), bias : Tensor(Float32, CPU(Float32)), padding : Tuple(Int, Int), stride : Tuple(Int, Int) = {1, 1})
Computes a 2D convolution over input images.
#conv2d_backward(input : Tensor(Float32, CPU(Float32)), weight : Tensor(Float32, CPU(Float32)), bias : Tensor(Float32, CPU(Float32)), grad_output : Tensor(Float32, CPU(Float32)), padding : Tuple(Int, Int), stride : Tuple(Int, Int) = {1, 1})
Computes gradients of a 2D convolution.
#dropout(input : Tensor(U, CPU(U)), mask : Tensor(U, CPU(U)), probability : Float) : Tensor(U, CPU(U)) forall U
Computes a forward dropout activation
#dropout(input : Tensor(U, OCL(U)), mask : Tensor(U, OCL(U)), probability : Float) : Tensor(U, OCL(U)) forall U
Computes a forward dropout activation
#dropout_backwards(gradient : Tensor(U, CPU(U)), mask : Tensor(U, CPU(U)), probability : Float) : Tensor(U, OCL(U)) forall U
Computes a backwards dropout derivative
#elu(x : Tensor(U, CPU(U)), alpha = 0.01) : Tensor(U, CPU(U)) forall U
Exponential linear unit activation
#elu!(x : Tensor(U, CPU(U)), alpha = 0.01) : Tensor(U, CPU(U)) forall U
Exponential linear unit activation
#elu_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
ELU derivative
#im2colgemm_conv2d(input : Tensor(U, CPU(U)), kernel : Tensor(U, CPU(U)), bias : Tensor(U, CPU(U)), padding : Tuple(Int, Int) = {0, 0}, stride : Tuple(Int, Int) = {1, 1}) : Tensor(U, CPU(U)) forall U
Computes a 2D convolution over input images.
#im2colgemm_conv2d_gradient(input : Tensor(U, CPU(U)), kernel : Tensor(U, CPU(U)), bias : Tensor(U, CPU(U)), grad_output : Tensor(U, CPU(U)), padding : Tuple(Int, Int) = {0, 0}, stride : Tuple(Int, Int) = {1, 1}) : Tuple(Tensor(U, CPU(U)), Tensor(U, CPU(U)), Tensor(U, CPU(U))) forall U
Computes gradients of a 2D convolution.
#kaiming_normal(*shape : Int, dtype : Tensor(U, V).class) forall U, V
#kaiming_uniform(*shape : Int, dtype : Tensor(U, V).class) forall U, V
#leaky_relu(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Leaky ReLU activation function
#leaky_relu(x : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U
Leaky ReLU activation function
#leaky_relu!(x : Tensor(U, CPU(U))) forall U
Leaky ReLU activation function
#leaky_relu!(x : Tensor(U, OCL(U))) forall U
Leaky ReLU activation function
#leaky_relu_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Leaky ReLU derivative
#leaky_relu_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U
Leaky ReLU derivative
#load_iris_dataset
Returns labels, as well as X and Y training inputs for the IRIS dataset.
#load_mnist_dataset
Returns a struct containing features, labels, as well as test_features and test_labels for the MNIST dataset
#maxpool(input : Tensor(U, CPU(U)), kernel : Tuple(Int, Int), padding = {0, 0}, stride = {0, 0}) : Tuple(Tensor(Int32, CPU(Int32)), Tensor(U, CPU(U))) forall U
Computes the maxpooling of a Tensor
#maxpool_backward(shape : Array(Int), max_indices : Tensor(Int32, CPU(Int32)), grad_output : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Computes the maxpooling gradient
#mean_relative_error(y : Tensor(U, CPU(U)), y_true : Tensor(U, CPU(U))) forall U
Mean relative error for Tensor, mean of the element-wise |y_true - y|/max(|y_true|, |y|) Normally the relative error is defined as |y_true - y| / |y_true|, but here max is used to make it symmetric and to prevent dividing by zero, guaranteed to return zero in the case when both values are zero.
#mse(input : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Mean squared error loss
#mse_backwards(gradient : Tensor(U, CPU(U)), cache : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U
Computes gradients of mean squared error loss
#numerical_gradient(input : Tensor(U, CPU(U)), f : Proc(Tensor(U, CPU(U)), U), h : U = U.new(1e-5)) forall U
Compute numerical gradient for any function w.r.t.
#numerical_gradient(input : Float, f : Proc(Float, Float), h : Float = 1e-5) : Float
Compute numerical gradient for any function w.r.t.
#relu(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
ReLU activation function
#relu(x : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U
ReLU activation function
#relu!(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
ReLU activation function
#relu!(x : Tensor(U, OCL(U))) forall U
ReLU activation function
#relu_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Derivative of the ReLU activation function
#relu_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U
Derivative of the ReLU activation function
#sgd_optimize(value : Tensor(U, CPU(U)), gradient : Tensor(U, CPU(U)), learning_rate : Float) forall U
#sgd_optimize(value : Tensor(U, OCL(U)), gradient : Tensor(U, OCL(U)), learning_rate : Float) forall U
#sigmoid(x : Tensor(U, OCL(U))) forall U
Sigmoid takes a real value as input and outputs another value between 0 and 1.
#sigmoid(x)
Sigmoid takes a real value as input and outputs another value between 0 and 1.
#sigmoid!(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Sigmoid takes a real value as input and outputs another value between 0 and 1.
#sigmoid!(x : Tensor(U, OCL(U))) forall U
Sigmoid takes a real value as input and outputs another value between 0 and 1.
#sigmoid_cross_entropy(input : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U
Sigmoid cross entropy loss
#sigmoid_cross_entropy(input : Tensor(U, OCL(U)), target : Tensor(U, OCL(U))) forall U
Sigmoid cross entropy loss
#sigmoid_cross_entropy_backwards(gradient : Tensor(U, CPU(U)), cache : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U
Computes gradients of sigmoid cross entropy loss
#sigmoid_cross_entropy_backwards(gradient : Tensor(U, OCL(U)), cache : Tensor(U, OCL(U)), target : Tensor(U, OCL(U))) forall U
Computes gradients of sigmoid cross entropy loss
#sigmoid_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Derivative of the Sigmoid function
#sigmoid_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U
Derivative of the Sigmoid function
#softmax_cross_entropy(input : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U
Computes softmax cross entropy loss
#softmax_cross_entropy_backward(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U
Computes gradients of SmCE loss
#tanh(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U
Tanh squashes a real-valued number to the range [-1, 1].
#tanh(x : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U
Tanh squashes a real-valued number to the range [-1, 1].
#tanh!(x : Tensor(U, CPU(U))) forall U
Tanh squashes a real-valued number to the range [-1, 1].
#tanh!(x : Tensor(U, OCL(U))) forall U
Tanh squashes a real-valued number to the range [-1, 1].
#tanh_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) forall U
Derivative of the Tanh function
#tanh_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) forall U
Derivative of the Tanh function
#variance_scaled(*shape : Int, dtype : U.class, device : V.class, scale : U = U.new(1), mode : FanMode = FanMode::FanIn, distribution : Distribution = Distribution::Normal) forall U, V

Instance Method Detail

def compute_fans(*shape : Int) #

[View source]

def conv2d(input : Tensor(Float32, CPU(Float32)), weight : Tensor(Float32, CPU(Float32)), bias : Tensor(Float32, CPU(Float32)), padding : Tuple(Int, Int), stride : Tuple(Int, Int) = {1, 1}) #

Computes a 2D convolution over input images. Intended to be used in 2d convolution forward pass. This applies a 2D cross-correlation, not to be confused with the mathematical convolution.

Arguments

input : Tensor - 4D Tensor batch of images of the size [N,C_in,H_in,W_in]
weight : Tensor - 4D Tensor convolving kernel weights of the size [C_out,C_in,kH,kW]
bias : Tensor - 3D Tensor bias of the size [C_out,1,1]
padding : Tuple - Tuple with height and width of the padding
stride : Tuple - Tuple with height and width of the stride

[View source]

def conv2d_backward(input : Tensor(Float32, CPU(Float32)), weight : Tensor(Float32, CPU(Float32)), bias : Tensor(Float32, CPU(Float32)), grad_output : Tensor(Float32, CPU(Float32)), padding : Tuple(Int, Int), stride : Tuple(Int, Int) = {1, 1}) #

Computes gradients of a 2D convolution. Intended to be used after #conv2d to calculate gradients in backward pass.

Arguments

input : Tensor - 4D Tensor batch of images of the size [N,C_in,H_in,W_in]
weight : Tensor - 4D Tensor convolving kernel weights of the size [C_out,C_in,kH,kW]
bias : Tensor - 3D Tensor bias of the size [C_out,1,1]
grad_output : Tensor - 4D Tensor gradient of size [N, C_out, H_out, W_out]
padding : Tuple - Tuple with height and width of the padding
stride : Tuple - Tuple with height and width of the stride

[View source]

def dropout(input : Tensor(U, CPU(U)), mask : Tensor(U, CPU(U)), probability : Float) : Tensor(U, CPU(U)) forall U #

Computes a forward dropout activation

Arguments

input : Tensor - Tensor to activate
mask : Tensor - Mask to dropout
probability : Float - Probability of dropout

[View source]

def dropout(input : Tensor(U, OCL(U)), mask : Tensor(U, OCL(U)), probability : Float) : Tensor(U, OCL(U)) forall U #

Computes a forward dropout activation

Arguments

input : Tensor - Tensor to activate
mask : Tensor - Mask to dropout
probability : Float - Probability of dropout

[View source]

def dropout_backwards(gradient : Tensor(U, CPU(U)), mask : Tensor(U, CPU(U)), probability : Float) : Tensor(U, OCL(U)) forall U #

Computes a backwards dropout derivative

Arguments

gradient : Tensor - Tensor used to compute backwards pass
mask : Tensor - Mask to apply to the gradient
probability : Float - Probability of dropout

[View source]

def elu(x : Tensor(U, CPU(U)), alpha = 0.01) : Tensor(U, CPU(U)) forall U #

Exponential linear unit activation

Arguments

x : Tensor - Tensor to activate

[View source]

def elu!(x : Tensor(U, CPU(U)), alpha = 0.01) : Tensor(U, CPU(U)) forall U #

Exponential linear unit activation

Arguments

x : Tensor - Tensor to activate

[View source]

def elu_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

ELU derivative

Arguments

gradient : Tensor - Tensor to derive
cached : Tensor - Cached Tensor from activation

[View source]

def im2colgemm_conv2d(input : Tensor(U, CPU(U)), kernel : Tensor(U, CPU(U)), bias : Tensor(U, CPU(U)), padding : Tuple(Int, Int) = {0, 0}, stride : Tuple(Int, Int) = {1, 1}) : Tensor(U, CPU(U)) forall U #

Computes a 2D convolution over input images. Intended to be used in 2d convolution forward pass. This applies a 2D cross-correlation, not to be confused with the mathematical convolution.

Arguments

input : Tensor - 4D Tensor batch of images of the size [N,C_in,H_in,W_in]
weight : Tensor - 4D Tensor convolving kernel weights of the size [C_out,C_in,kH,kW]
bias : Tensor - 3D Tensor bias of the size [C_out,1,1]
padding : Tuple - Tuple with height and width of the padding
stride : Tuple - Tuple with height and width of the stride

[View source]

def im2colgemm_conv2d_gradient(input : Tensor(U, CPU(U)), kernel : Tensor(U, CPU(U)), bias : Tensor(U, CPU(U)), grad_output : Tensor(U, CPU(U)), padding : Tuple(Int, Int) = {0, 0}, stride : Tuple(Int, Int) = {1, 1}) : Tuple(Tensor(U, CPU(U)), Tensor(U, CPU(U)), Tensor(U, CPU(U))) forall U #

Computes gradients of a 2D convolution. Intended to be used after #conv2d to calculate gradients in backward pass.

Arguments

input : Tensor - 4D Tensor batch of images of the size [N,C_in,H_in,W_in]
weight : Tensor - 4D Tensor convolving kernel weights of the size [C_out,C_in,kH,kW]
bias : Tensor - 3D Tensor bias of the size [C_out,1,1]
grad_output : Tensor - 4D Tensor gradient of size [N, C_out, H_out, W_out]
padding : Tuple - Tuple with height and width of the padding
stride : Tuple - Tuple with height and width of the stride

[View source]

def kaiming_normal(*shape : Int, dtype : Tensor(U, V).class) forall U, V #

[View source]

def kaiming_uniform(*shape : Int, dtype : Tensor(U, V).class) forall U, V #

[View source]

def leaky_relu(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Leaky ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def leaky_relu(x : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U #

Leaky ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def leaky_relu!(x : Tensor(U, CPU(U))) forall U #

Leaky ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def leaky_relu!(x : Tensor(U, OCL(U))) forall U #

Leaky ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def leaky_relu_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Leaky ReLU derivative

Arguments

gradient : Tensor - Tensor to derive
cached : Tensor - Cached Tensor from activation

[View source]

def leaky_relu_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U #

Leaky ReLU derivative

Arguments

gradient : Tensor - Tensor to derive
cached : Tensor - Cached Tensor from activation

[View source]

def load_iris_dataset #

Returns labels, as well as X and Y training inputs for the IRIS dataset.

[View source]

def load_mnist_dataset #

Returns a struct containing features, labels, as well as test_features and test_labels for the MNIST dataset

[View source]

def maxpool(input : Tensor(U, CPU(U)), kernel : Tuple(Int, Int), padding = {0, 0}, stride = {0, 0}) : Tuple(Tensor(Int32, CPU(Int32)), Tensor(U, CPU(U))) forall U #

Computes the maxpooling of a Tensor

Arguments

input : Tensor - Tensor to pool
kernel : Tuple - Kernel height and width
target : Tensor - Tensor truth values
padding : Tuple - Tuple with height and width of the padding
stride : Tuple - Tuple with height and width of the stride

[View source]

def maxpool_backward(shape : Array(Int), max_indices : Tensor(Int32, CPU(Int32)), grad_output : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Computes the maxpooling gradient

Arguments

shape : Array - Shape of gradient output
max_indices : Tensor - Pooled max indices
grad_output : Tensor - Output from forward pass

[View source]

def mean_relative_error(y : Tensor(U, CPU(U)), y_true : Tensor(U, CPU(U))) forall U #

[View source]

def mse(input : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Mean squared error loss

Arguments

input : Tensor - Predicted values
target : Tensor - Truth values

[View source]

def mse_backwards(gradient : Tensor(U, CPU(U)), cache : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U #

Computes gradients of mean squared error loss

Arguments

gradient : Tensor - Tensor gradient computed from MSE forwards
cache : Tensor 4D - Cached Tensor from activation
target : Tensor - Tensor truth values

[View source]

def numerical_gradient(input : Tensor(U, CPU(U)), f : Proc(Tensor(U, CPU(U)), U), h : U = U.new(1e-5)) forall U #

Compute numerical gradient for any function w.r.t. to an input Tensor, useful for gradient checking, recommend using float64 types to assure numerical precision. The gradient is calculated as: (f(x + h) - f(x - h)) / (2*h) where h is a small number, typically 1e-5 f(x) will be called for each input elements with +h and -h pertubation. Iterate over all elements calculating each partial derivative

[View source]

def numerical_gradient(input : Float, f : Proc(Float, Float), h : Float = 1e-5) : Float #

Compute numerical gradient for any function w.r.t. to an input value, useful for gradient checking, recommend using float64 types to assure numerical precision. The gradient is calculated as: (f(x + h) - f(x - h)) / (2*h) where h is a small number, typically 1e-5.

[View source]

def relu(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def relu(x : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U #

ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def relu!(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def relu!(x : Tensor(U, OCL(U))) forall U #

ReLU activation function

Arguments

x : Tensor - Argument to activate

[View source]

def relu_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Derivative of the ReLU activation function

Arguments

gradient : Tensor - Tensor to derive
cached : Tensor Cached Tensor from activation

[View source]

def relu_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U #

Derivative of the ReLU activation function

Arguments

gradient : Tensor - Tensor to derive
cached : Tensor Cached Tensor from activation

[View source]

def sgd_optimize(value : Tensor(U, CPU(U)), gradient : Tensor(U, CPU(U)), learning_rate : Float) forall U #

[View source]

def sgd_optimize(value : Tensor(U, OCL(U)), gradient : Tensor(U, OCL(U)), learning_rate : Float) forall U #

[View source]

def sigmoid(x : Tensor(U, OCL(U))) forall U #

Sigmoid takes a real value as input and outputs another value between 0 and 1. It’s easy to work with and has all the nice properties of activation functions: it’s non-linear, continuously differentiable, monotonic, and has a fixed output range.

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
puts Num::NN.sigmoid(a) # => [0.524979, 0.584191, 0.65701 ]

[View source]

def sigmoid(x) #

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
puts Num::NN.sigmoid(a) # => [0.524979, 0.584191, 0.65701 ]

[View source]

def sigmoid!(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
puts Num::NN.sigmoid(a) # => [0.524979, 0.584191, 0.65701 ]

[View source]

def sigmoid!(x : Tensor(U, OCL(U))) forall U #

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
puts Num::NN.sigmoid(a) # => [0.524979, 0.584191, 0.65701 ]

[View source]

def sigmoid_cross_entropy(input : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U #

Sigmoid cross entropy loss

Arguments

input : Tensor - Predicted values
target : Tensor - Truth values

[View source]

def sigmoid_cross_entropy(input : Tensor(U, OCL(U)), target : Tensor(U, OCL(U))) forall U #

Sigmoid cross entropy loss

Arguments

input : Tensor - Predicted values
target : Tensor - Truth values

[View source]

def sigmoid_cross_entropy_backwards(gradient : Tensor(U, CPU(U)), cache : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U #

Computes gradients of sigmoid cross entropy loss

Arguments

gradient : Tensor - Tensor gradient computed from SCE forwards
cache : Tensor 4D - Cached Tensor from activation
target : Tensor - Tensor truth values

[View source]

def sigmoid_cross_entropy_backwards(gradient : Tensor(U, OCL(U)), cache : Tensor(U, OCL(U)), target : Tensor(U, OCL(U))) forall U #

Computes gradients of sigmoid cross entropy loss

Arguments

gradient : Tensor - Tensor gradient computed from SCE forwards
cache : Tensor 4D - Cached Tensor from activation
target : Tensor - Tensor truth values

[View source]

def sigmoid_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Derivative of the Sigmoid function

Arguments

gradient : Tensor - Tensor to derive
cached : Tensor - Cached Tensor from activation

Examples

a = [0.1, 0.34, 0.65].to_tensor
puts Num::NN.d_sigmoid(a) # => [0.249376, 0.242912, 0.225348]

[View source]

def sigmoid_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U #

Derivative of the Sigmoid function

Arguments

gradient : Tensor - Tensor to derive
cached : Tensor - Cached Tensor from activation

Examples

a = [0.1, 0.34, 0.65].to_tensor
puts Num::NN.d_sigmoid(a) # => [0.249376, 0.242912, 0.225348]

[View source]

def softmax_cross_entropy(input : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U #

Computes softmax cross entropy loss

Arguments

input : Tensor - Predicted values
target : Tensor - Truth values

[View source]

def softmax_cross_entropy_backward(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U)), target : Tensor(U, CPU(U))) forall U #

Computes gradients of SmCE loss

Arguments

gradient : Tensor - Tensor gradient computed from SmCE forwards
cache : Tensor 4D - Cached Tensor from activation
target : Tensor - Tensor truth values

[View source]

def tanh(x : Tensor(U, CPU(U))) : Tensor(U, CPU(U)) forall U #

Tanh squashes a real-valued number to the range [-1, 1]. It’s non-linear. But unlike Sigmoid, its output is zero-centered. Therefore, in practice the tanh non-linearity is always preferred to the sigmoid nonlinearity.

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
Num::NN.tanh(a) # => [0.099668, 0.327477, 0.57167 ]

[View source]

def tanh(x : Tensor(U, OCL(U))) : Tensor(U, OCL(U)) forall U #

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
Num::NN.tanh(a) # => [0.099668, 0.327477, 0.57167 ]

[View source]

def tanh!(x : Tensor(U, CPU(U))) forall U #

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
Num::NN.tanh(a) # => [0.099668, 0.327477, 0.57167 ]

[View source]

def tanh!(x : Tensor(U, OCL(U))) forall U #

Arguments

x : Tensor - Tensor to activate

Examples

a = [0.1, 0.34, 0.65].to_tensor
Num::NN.tanh(a) # => [0.099668, 0.327477, 0.57167 ]

[View source]

def tanh_prime(gradient : Tensor(U, CPU(U)), cached : Tensor(U, CPU(U))) forall U #

Derivative of the Tanh function

Arguments

x : Tensor - Tensor to derive
cached : Tensor - Cached Tensor from activation

Examples

a = [0.1, 0.34, 0.65].to_tensor
Num::NN.d_tanh(a) # => [0.990066, 0.892759, 0.673193]

[View source]

def tanh_prime(gradient : Tensor(U, OCL(U)), cached : Tensor(U, OCL(U))) forall U #

Derivative of the Tanh function

Arguments

x : Tensor - Tensor to derive
cached : Tensor - Cached Tensor from activation

Examples

a = [0.1, 0.34, 0.65].to_tensor
Num::NN.d_tanh(a) # => [0.990066, 0.892759, 0.673193]

[View source]

def variance_scaled(*shape : Int, dtype : U.class, device : V.class, scale : U = U.new(1), mode : FanMode = FanMode::FanIn, distribution : Distribution = Distribution::Normal) forall U, V #

[View source]

CrystalDoc.info

num

module Num::NN

Extended Modules

Defined in:

Instance Method Summary

Instance Method Detail

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Arguments

Examples

Arguments

Examples

Arguments

Examples

Arguments

Examples

Arguments

Arguments

Arguments

Arguments

Arguments

Examples

Arguments

Examples

Arguments

Arguments

Arguments

Examples

Arguments

Examples

Arguments

Examples

Arguments

Examples

Arguments

Examples

Arguments

Examples