* width direction
* @retval LazyTensor *this
*/
- LazyTensor &sum(int axis = 0);
+ LazyTensor &sum(int axis);
/**
* @brief Wrapper method of average. see tensor.h for more detail (memcopy
* width direction
* @retval LazyTensor *this
*/
- LazyTensor &average(int axis = 0);
+ LazyTensor &average(int axis);
+
+ /**
+ * @brief Wrapper method of average. see tensor.h for more detail (memcopy
+ * happens)
+ * @retval LazyTensor *this
+ */
+ LazyTensor &average();
/**
* @brief apply A tensor function when predicate is true
* 3 : width direction
* @retval Calculated Tensor
*/
- Tensor sum(int axis = 0) const;
+ Tensor sum(int axis) const;
/**
* @brief Averaging the Tensor elements according to the axis
* 3 : width direction
* @retval Calculated Tensor
*/
- Tensor average(int axis = 0) const;
+ Tensor average(int axis) const;
+
+ /**
+ * @brief Averaging the Tensor elements by all axis
+ * @retval Calculated Tensor
+ */
+ Tensor average() const;
/**
* @brief Anchor a starting point to defer following evaluation
void width(unsigned int w) { setTensorDim(3, w); }
const unsigned int *getDim() const { return dim; }
+ const unsigned int getNumDim() const { return MAXDIM; }
void setTensorDim(unsigned int idx, unsigned int value);
int setTensorDim(std::string input_shape);
}
/**
+ * @brief Wrapper method of average. see tensor.h for more detail (memcopy
+ * happens)
+ * @retval LazyTensor *this
+ */
+LazyTensor &LazyTensor::average() {
+ auto f = [](Tensor &t) mutable -> int {
+ try {
+ t = t.average();
+ return ML_ERROR_NONE;
+ } catch (std::runtime_error &e) {
+ return ML_ERROR_INVALID_PARAMETER;
+ }
+ };
+
+ call_chain.push_back(f);
+ return *this;
+}
+
+/**
* @brief execute the call_chain to evaluate
* @retval calculated tensor
*/
// y2 <- y2 - y;
y2.subtract_i(y);
- l = y2.chain().multiply_i(y2).sum_by_batch().multiply_i(0.5).run();
+ l = y2.chain().multiply_i(y2).average().run();
} break;
case COST_ENTROPY_SIGMOID: {
// @todo: change this to apply_i
.apply(static_cast<float (*)(float)>(&std::exp))
.add(1.0)
.apply(logFloat);
- mid_term = mid_term.add(mid_term.apply(relu));
-
- // loss = y * y2 - (log(1 + exp(-abs(y))) + max(y, 0))
- l = y2.chain()
- .multiply_i(y)
- .add_i(mid_term)
- .multiply_i(-1.0 / y2.getWidth())
- .run()
- .sum_by_batch();
+ mid_term = mid_term.add(y.apply(relu));
+
+ // y * y2
+ Tensor end_term = y2.chain().multiply_i(y).run();
+
+ // loss = log(1 + exp(-abs(y))) + max(y, 0) - (y * y2)
+ l = mid_term.subtract(end_term).average();
+ y = y.apply(sigmoid);
} break;
case COST_ENTROPY_SOFTMAX: {
y = y.apply(softmax);
- l = y2.chain()
- .multiply_i(y.apply(logFloat))
- .multiply_i(-1.0 / y2.getWidth())
- .run()
- .sum_by_batch();
+ l = y2.chain().multiply_i(y.apply(logFloat)).run().sum_by_batch();
} break;
case COST_ENTROPY: {
/// @note: that current implementation does not update grad since updating
/// grad changes it's dimension
Tensor x_grad = param.grad;
- x_grad = x_grad.average();
+ x_grad = x_grad.average(0);
switch (type) {
case OptType::sgd:
x.add_i(x_grad, -ll);
return result;
}
+/**
+ * @brief Calculate average value according to the axis.
+ */
+Tensor Tensor::average() const {
+ LazyTensor lazy_result = this->chain();
+
+ for (unsigned int axis = 0; axis < dim.getNumDim(); ++axis)
+ lazy_result = lazy_result.average(axis);
+
+ return lazy_result.run();
+}
+
void Tensor::setValue(float val) {
float *data = getData();
std::fill(data, data + length(), val);