namespace caffe {
-/* NeuronLayer
- An interface for layers that take one blob as input (x),
- and produce one blob as output (y).
-*/
+/**
+ * @brief An interface for layers that take one blob as input (@f$ x @f$)
+ * and produce one equally-sized blob as output (@f$ y @f$), where
+ * each element of the output depends only on the corresponding input
+ * element.
+ */
template <typename Dtype>
class NeuronLayer : public Layer<Dtype> {
public:
virtual inline int ExactNumTopBlobs() const { return 1; }
};
-/* AbsVal Layer
- y = |x|
-
- y' = 1 if x > 0
- = -1 if x < 0
-*/
+/**
+ * @brief Computes @f$ y = |x| @f$
+ *
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$ y = |x| @f$
+ */
template <typename Dtype>
class AbsValLayer : public NeuronLayer<Dtype> {
public:
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
+
protected:
+ /// @copydoc AbsValLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the absolute value inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 2)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x} =
+ * \mathrm{sign}(x) \frac{\partial E}{\partial y}
+ * @f$ if propagate_down[0]
+ */
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
};
-/* BNLLLayer
-
- y = x + log(1 + exp(-x)) if x > 0
- y = log(1 + exp(x)) if x <= 0
-
- y' = exp(x) / (exp(x) + 1)
-*/
+/**
+ * @brief Computes @f$ y = x + \log(1 + \exp(-x)) @f$ if @f$ x > 0 @f$;
+ * @f$ y = \log(1 + \exp(x)) @f$ otherwise.
+ *
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = \left\{
+ * \begin{array}{ll}
+ * x + \log(1 + \exp(-x)) & \mbox{if } x > 0 \\
+ * \log(1 + \exp(x)) & \mbox{otherwise}
+ * \end{array} \right.
+ * @f$
+ */
template <typename Dtype>
class BNLLLayer : public NeuronLayer<Dtype> {
public:
}
protected:
+ /// @copydoc BNLLLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the BNLL inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 2)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x}
+ * @f$ if propagate_down[0]
+ */
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
};
-/* DropoutLayer
- During training only, sets some portion of x to 0, adjusting the
- vector magnitude accordingly.
-
- mask = bernoulli(1 - threshold)
- scale = 1 / (1 - threshold)
- y = x * mask * scale
-
- y' = mask * scale
-*/
+/**
+ * @brief During training only, sets a random portion of @f$x@f$ to 0, adjusting
+ * the rest of the vector magnitude accordingly.
+ *
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$ y = |x| @f$
+ */
template <typename Dtype>
class DropoutLayer : public NeuronLayer<Dtype> {
public:
+ /**
+ * @param param provides DropoutParameter dropout_param,
+ * with DropoutLayer options:
+ * - dropout_ratio (\b optional, default 0.5).
+ * Sets the probability @f$ p @f$ that any given unit is dropped.
+ */
explicit DropoutLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
}
protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs. At training time, we have @f$
+ * y_{\mbox{train}} = \left\{
+ * \begin{array}{ll}
+ * \frac{x}{1 - p} & \mbox{if } u > p \\
+ * 0 & \mbox{otherwise}
+ * \end{array} \right.
+ * @f$, where @f$ u \sim U(0, 1)@f$ is generated independently for each
+ * input at each iteration. At test time, we simply have
+ * @f$ y_{\mbox{test}} = \mathbb{E}[y_{\mbox{train}}] = x @f$.
+ */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+ /// when divided by UINT_MAX, the randomly generated values @f$u\sim U(0,1)@f$
Blob<unsigned int> rand_vec_;
+ /// the probability @f$ p @f$ of dropping any input
Dtype threshold_;
+ /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
Dtype scale_;
unsigned int uint_thres_;
};
-/* PowerLayer
- y = (shift + scale * x) ^ power
-
- y' = scale * power * (shift + scale * x) ^ (power - 1)
- = scale * power * y / (shift + scale * x)
-*/
+/**
+ * @brief Computes @f$ y = (\alpha x + \beta) ^ \gamma @f$,
+ * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$,
+ * and power @f$ \gamma @f$.
+ */
template <typename Dtype>
class PowerLayer : public NeuronLayer<Dtype> {
public:
+ /**
+ * @param param provides PowerParameter power_param,
+ * with PowerLayer options:
+ * - scale (\b optional, default 1) the scale @f$ \alpha @f$
+ * - shift (\b optional, default 0) the shift @f$ \beta @f$
+ * - power (\b optional, default 1) the power @f$ \gamma @f$
+ */
explicit PowerLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
}
protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = (\alpha x + \beta) ^ \gamma
+ * @f$
+ */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the power inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x} =
+ * \frac{\partial E}{\partial y}
+ * \alpha \gamma (\alpha x + \beta) ^ {\gamma - 1} =
+ * \frac{\partial E}{\partial y}
+ * \frac{\alpha \gamma y}{\alpha x + \beta}
+ * @f$ if propagate_down[0]
+ */
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+ /// @brief @f$ \gamma @f$ from layer_param_.power_param()
Dtype power_;
+ /// @brief @f$ \alpha @f$ from layer_param_.power_param()
Dtype scale_;
+ /// @brief @f$ \beta @f$ from layer_param_.power_param()
Dtype shift_;
+ /// @brief Result of @f$ \alpha \gamma @f$
Dtype diff_scale_;
};
-/* ReLULayer
- Rectified Linear Unit non-linearity.
- The simple max is fast to compute, and the function does not saturate.
-
- y = max(0, x).
-
- y' = 0 if x < 0
- y' = 1 if x > 0
-*/
+/**
+ * @brief Rectified Linear Unit non-linearity @f$ y = \max(0, x) @f$.
+ * The simple max is fast to compute, and the function does not saturate.
+ */
template <typename Dtype>
class ReLULayer : public NeuronLayer<Dtype> {
public:
+ /**
+ * @param param provides ReLUParameter relu_param,
+ * with ReLULayer options:
+ * - negative_slope (\b optional, default 0).
+ * the value @f$ \nu @f$ by which negative values are multiplied.
+ */
explicit ReLULayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
}
protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = \max(0, x)
+ * @f$ by default. If a non-zero negative_slope @f$ \nu @f$ is provided,
+ * the computed outputs are @f$ y = \max(0, x) + \nu \min(0, x) @f$.
+ */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
+ /**
+ * @brief Computes the error gradient w.r.t. the ReLU inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x} = \left\{
+ * \begin{array}{lr}
+ * 0 & \mathrm{if} \; x \le 0 \\
+ * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0
+ * \end{array} \right.
+ * @f$ if propagate_down[0], by default.
+ * If a non-zero negative_slope @f$ \nu @f$ is provided,
+ * the computed gradients are @f$
+ * \frac{\partial E}{\partial x} = \left\{
+ * \begin{array}{lr}
+ * \nu \frac{\partial E}{\partial y} & \mathrm{if} \; x \le 0 \\
+ * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0
+ * \end{array} \right.
+ * @f$.
+ */
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
};
-/* SigmoidLayer
- Sigmoid function non-linearity, a classic choice in neural networks.
- Note that the gradient vanishes as the values move away from 0.
- The ReLULayer is often a better choice for this reason.
-
- y = 1. / (1 + exp(-x))
-
- y ' = exp(x) / (1 + exp(x))^2
- or
- y' = y * (1 - y)
-*/
+/**
+ * @brief Sigmoid function non-linearity @f$
+ * y = (1 + \exp(-x))^{-1}
+ * @f$, a classic choice in neural networks.
+ *
+ * Note that the gradient vanishes as the values move away from 0.
+ * The ReLULayer is often a better choice for this reason.
+ */
template <typename Dtype>
class SigmoidLayer : public NeuronLayer<Dtype> {
public:
}
protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = (1 + \exp(-x))^{-1}
+ * @f$
+ */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the sigmoid inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x}
+ * = \frac{\partial E}{\partial y} y (1 - y)
+ * @f$ if propagate_down[0]
+ */
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
};
-/* TanHLayer
- Hyperbolic tangent non-linearity, popular in auto-encoders.
-
- y = 1. * (exp(2x) - 1) / (exp(2x) + 1)
-
- y' = 1 - ( (exp(2x) - 1) / (exp(2x) + 1) ) ^ 2
-*/
+/**
+ * @brief TanH hyperbolic tangent non-linearity @f$
+ * y = \frac{\exp(2x) - 1}{\exp(2x) + 1}
+ * @f$, popular in auto-encoders.
+ *
+ * Note that the gradient vanishes as the values move away from 0.
+ * The ReLULayer is often a better choice for this reason.
+ */
template <typename Dtype>
class TanHLayer : public NeuronLayer<Dtype> {
public:
}
protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = \frac{\exp(2x) - 1}{\exp(2x) + 1}
+ * @f$
+ */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the sigmoid inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x}
+ * = \frac{\partial E}{\partial y}
+ * \left(1 - \left[\frac{\exp(2x) - 1}{exp(2x) + 1} \right]^2 \right)
+ * = \frac{\partial E}{\partial y} (1 - y^2)
+ * @f$ if propagate_down[0]
+ */
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
};
-/* ThresholdLayer
- Outputs 1 if value in input is above threshold, 0 otherwise.
- The defult threshold = 0, which means positive values would become 1 and
- negative or 0, would become 0
-
- y = 1 if x > threshold
- y = 0 if x <= threshold
-
- y' = don't differenciable
-*/
+/**
+ * @brief Tests whether the input exceeds a threshold: outputs 1 for inputs
+ * above threshold; 0 otherwise.
+ */
template <typename Dtype>
class ThresholdLayer : public NeuronLayer<Dtype> {
public:
+ /**
+ * @param param provides ThresholdParameter threshold_param,
+ * with ThresholdLayer options:
+ * - threshold (\b optional, default 0).
+ * the threshold value @f$ t @f$ to which the input values are compared.
+ */
explicit ThresholdLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
}
protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = \left\{
+ * \begin{array}{lr}
+ * 0 & \mathrm{if} \; x \le t \\
+ * 1 & \mathrm{if} \; x > t
+ * \end{array} \right.
+ * @f$
+ */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
+ /// @brief Not implemented (non-differentiable function)
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
NOT_IMPLEMENTED;