SrcSharedTensor() : src(nullptr), off(0) {}
SrcSharedTensor(const Tensor *tensor, size_t offset) :
- src(tensor),
- off(offset) {}
+ src(tensor), off(offset) {}
/**
* @brief Get the allocated src tensor
}
}
-// int Tensor::apply_i(std::function<float(float)> f) {
-// Tensor result = *this;
-// apply(f, result);
-
-// return ML_ERROR_NONE;
-// }
-
-// Tensor Tensor::apply(std::function<float(float)> f) const {
-// Tensor result;
-// return apply(f, result);
-// }
-
-// Tensor &Tensor::apply(std::function<float(float)> f, Tensor &output) const {
-// CREATE_IF_EMPTY_DIMS(output, dim);
-
-// if (dim != output.dim) {
-// /// @todo add unittest
-// throw std::invalid_argument(
-// "[Tensor::apply] output dimension does not match");
-// }
-
-// if (contiguous && output.contiguous) {
-// const float *data = getData();
-// float *rdata = output.getData();
-// std::transform(data, data + size(), rdata, f);
-// } else if (strides[3] == 1 && output.strides[3] == 1) {
-// /** @todo optimize this with combining these loops where stride is 1 */
-// for (unsigned int b = 0; b < batch(); ++b) {
-// for (unsigned int c = 0; c < channel(); ++c) {
-// for (unsigned int h = 0; h < height(); ++h) {
-// float *out_data = output.getAddress(b, c, h, 0);
-// const float *in_data = getAddress(b, c, h, 0);
-// std::transform(in_data, in_data + width(), out_data, f);
-// }
-// }
-// }
-// } else {
-// for (unsigned int b = 0; b < batch(); ++b) {
-// for (unsigned int c = 0; c < channel(); ++c) {
-// for (unsigned int h = 0; h < height(); ++h) {
-// for (unsigned int w = 0; w < width(); ++w) {
-// output.setValue(b, c, h, w, f(getValue(b, c, h, w)));
-// }
-// }
-// }
-// }
-// }
-
-// return output;
-// }
-
Tensor Tensor::apply(std::function<Tensor(Tensor)> f) const { return f(*this); }
Tensor &Tensor::apply(std::function<Tensor &(Tensor, Tensor &)> f,
}
}
- // if (dim.getDataType() == Tdatatype::FP32) {
- // if (contiguous && output.contiguous) {
- // const float *data = (getData<float>());
- // float *rdata = (output.getData<float>());
-
- // std::transform(data, data + size(), rdata, f);
- // } else if (strides[3] == 1 && output.strides[3] == 1) {
- // /** @todo optimize this with combining these loops where stride is 1
- // */ for (unsigned int b = 0; b < batch(); ++b) {
- // for (unsigned int c = 0; c < channel(); ++c) {
- // for (unsigned int h = 0; h < height(); ++h) {
- // float *out_data = output.getAddress<float>(b, c, h, 0);
- // const float *in_data = getAddress<float>(b, c, h, 0);
- // std::transform(in_data, in_data + width(), out_data, f);
- // }
- // }
- // }
- // } else {
- // for (unsigned int b = 0; b < batch(); ++b) {
- // for (unsigned int c = 0; c < channel(); ++c) {
- // for (unsigned int h = 0; h < height(); ++h) {
- // for (unsigned int w = 0; w < width(); ++w) {
- // output.setValue(b, c, h, w, f(getValue<float>(b, c, h, w)));
- // }
- // }
- // }
- // }
- // }
- // } else if (dim.getDataType() == Tdatatype::FP16) {
-
- // auto f_16 = [f](_FP16 x) -> _FP16 {
- // return static_cast<_FP16>(f(static_cast<float>(x)));
- // };
-
- // // std::function<_FP16(_FP16)> f_16 =
- // // static_cast<std::function<_FP16(_FP16)>>(f);
-
- // if (contiguous && output.contiguous) {
- // const _FP16 *data = (getData<_FP16>());
- // _FP16 *rdata = (output.getData<_FP16>());
-
- // std::transform(data, data + size(), rdata, f_16);
- // } else if (strides[3] == 1 && output.strides[3] == 1) {
- // /** @todo optimize this with combining these loops where stride is 1
- // */ for (unsigned int b = 0; b < batch(); ++b) {
- // for (unsigned int c = 0; c < channel(); ++c) {
- // for (unsigned int h = 0; h < height(); ++h) {
- // _FP16 *out_data = output.getAddress<_FP16>(b, c, h, 0);
- // const _FP16 *in_data = getAddress<_FP16>(b, c, h, 0);
- // std::transform(in_data, in_data + width(), out_data, f_16);
- // }
- // }
- // }
- // } else {
- // for (unsigned int b = 0; b < batch(); ++b) {
- // for (unsigned int c = 0; c < channel(); ++c) {
- // for (unsigned int h = 0; h < height(); ++h) {
- // for (unsigned int w = 0; w < width(); ++w) {
- // output.setValue(b, c, h, w, f_16(getValue<_FP16>(b, c, h,
- // w)));
- // }
- // }
- // }
- // }
- // }
- // }
return output;
};
return result;
};
- // /**
- // * @brief Apply instantly to the element
- // *
- // * @param f function to apply
- // * @return int ML_ERROR_NONE if successful
- // */
- // int apply_i(std::function<_FP16(_FP16)> f) {
- // Tensor result = *this;
- // apply(f, result);
-
- // return ML_ERROR_NONE;
- // };
-
- // /**
- // * @brief Apply function element by element
- // * @param[in] *function function pointer applied
- // * @retval Tensor
- // */
- // Tensor apply(std::function<_FP16(_FP16)> f) const {
- // Tensor result;
- // return apply(f, result);
- // };
-
- // /**
- // * @brief Apply function element by element
- // * @param[in] *function function pointer applied
- // * @retval Tensor
- // */
- // Tensor apply(std::function<float(float)> f) const {
- // Tensor result;
- // return apply(f, result);
- // };
-
- // /**
- // * @brief Apply function element by element
- // * @param[in] *function function pointer applied
- // * @param[out] output output tensor
- // * @retval Tensor
- // */
- // Tensor &apply(std::function<_FP16(_FP16)> f, Tensor &output) const {
- // CREATE_IF_EMPTY_DIMS(output, dim, nullptr);
-
- // if (dim != output.dim) {
- // /// @todo add unittest
- // throw std::invalid_argument(
- // "[Tensor::apply] output dimension does not match");
- // }
-
- // #ifdef ENABLE_FP16
- // if (contiguous && output.contiguous) {
- // const _FP16 *data = (getData<_FP16>());
- // _FP16 *rdata = (output.getData<_FP16>());
-
- // std::transform(data, data + size(), rdata, f);
- // } else if (strides[3] == 1 && output.strides[3] == 1) {
- // /** @todo optimize this with combining these loops where stride is 1
- // */ for (unsigned int b = 0; b < batch(); ++b) {
- // for (unsigned int c = 0; c < channel(); ++c) {
- // for (unsigned int h = 0; h < height(); ++h) {
- // _FP16 *out_data = (_FP16 *)output.getAddress(b, c, h, 0);
- // const _FP16 *in_data = (_FP16 *)getAddress(b, c, h, 0);
- // std::transform(in_data, in_data + width(), out_data, f);
- // }
- // }
- // }
- // } else {
- // for (unsigned int b = 0; b < batch(); ++b) {
- // for (unsigned int c = 0; c < channel(); ++c) {
- // for (unsigned int h = 0; h < height(); ++h) {
- // for (unsigned int w = 0; w < width(); ++w) {
- // output.setValue(b, c, h, w,
- // f((_FP16)((_FP16)getValue(b, c, h, w))));
- // }
- // }
- // }
- // }
- // }
- // #else
- // throw std::invalid_argument("Error: enable-fp16 is not enabled");
- // #endif
-
- // return output;
- // };
-
/**
* @brief Apply function to Tensor
* @param[in] *function function pointer applied