#include <stdio.h>
#include <lazy_tensor.h>
-#include <nntrainer_log.h>
#include <tensor.h>
#include <util_func.h>
int buffer_axis; /**< the smallest axis that should be looped.
-1 means no loop needed*/
std::array<unsigned int, TensorDim::MAXDIM>
- strides; /**< modified strides for the loop */
+ strides; /**< modified strides for the loop */
nntrainer::TensorDim::TensorType tensor_type;
};
SrcSharedTensor() : src(nullptr), off(0) {}
SrcSharedTensor(const Tensor *tensor, size_t offset) :
- src(tensor), off(offset) {}
+ src(tensor),
+ off(offset) {}
/**
* @brief Get the allocated src tensor
if (getDataType() == ml::train::TensorDim::DataType::FP32) {
mem_data = new MemoryData((void *)(new float[dim.getDataLen()]()));
data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
- delete[] (float *)mem_data->getAddr();
+ delete[](float *) mem_data->getAddr();
delete mem_data;
});
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
mem_data = new MemoryData((void *)(new __fp16[dim.getDataLen()]()));
data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
- delete[] (__fp16 *)mem_data->getAddr();
+ delete[](__fp16 *) mem_data->getAddr();
delete mem_data;
});
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
offset = 0;
initialize();
return false;
}
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *_data = getData<__fp16>();
const __fp16 *_rdata = rhs.getData<__fp16>();
for (size_t i = 0; i < len; ++i) {
std::fabs(_data[i] - _rdata[i]) > epsilon)
return false;
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return true;
setDist<float, std::bernoulli_distribution>(
std::bernoulli_distribution(probability));
} else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
setDist<__fp16, std::bernoulli_distribution>(
std::bernoulli_distribution((__fp16)probability));
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
NNTR_THROW_IF(output.getData<float>() == nullptr, std::invalid_argument)
<< output.getName() << " is not allocated";
} else if (getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
NNTR_THROW_IF(getData<__fp16>() == nullptr, std::invalid_argument)
<< getName() << " is not allocated";
NNTR_THROW_IF(m.getData<__fp16>() == nullptr, std::invalid_argument)
<< m.getName() << " is not allocated";
NNTR_THROW_IF(output.getData<__fp16>() == nullptr, std::invalid_argument)
<< output.getName() << " is not allocated";
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
// Format NCHW Case
}
}
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 ||
beta != 0.0) {
for (unsigned int b = 0; b < batch(); ++b) {
}
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
} else { // Format NHWC Case
if (getDataType() == Tdatatype::FP32) {
}
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 ||
beta != 0.0) {
for (unsigned int b = 0; b < batch(); ++b) {
}
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
NNTR_THROW_IF(output.getData<float>() == nullptr, std::invalid_argument)
<< output.getName() << " is not allocated";
} else if (getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
NNTR_THROW_IF(getData<__fp16>() == nullptr, std::invalid_argument)
<< getName() << " is not allocated";
NNTR_THROW_IF(m.getData<__fp16>() == nullptr, std::invalid_argument)
<< m.getName() << " is not allocated";
NNTR_THROW_IF(output.getData<__fp16>() == nullptr, std::invalid_argument)
<< output.getName() << " is not allocated";
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
// Format NCHW Case
}
}
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 ||
beta != 0.0) {
for (unsigned int b = 0; b < batch(); ++b) {
}
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
} else { // Format NHWC Case
if (getDataType() == Tdatatype::FP32) {
}
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 ||
beta != 0.0) {
for (unsigned int b = 0; b < batch(); ++b) {
}
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
return output;
sscal(len, value, data, 1);
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
__fp16 *data = getData<__fp16>();
unsigned int len = size();
sscal(len, value, data, 1);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return ML_ERROR_NONE;
}
auto f = std::bind(std::multiplies<float>(), std::placeholders::_1, value);
return apply(f, out);
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = std::bind(std::multiplies<__fp16>(), std::placeholders::_1, value);
return apply(f, out);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return out;
}
return output;
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = [&](const BroadcastInfo &e, const __fp16 *buf, const __fp16 *m_buf,
__fp16 *out_buf) {
if (e.strides[3] == 1 && output.strides[3] == 1 && strides[3] == 1 &&
apply_broadcast(m, f, output);
return output;
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return output;
}
apply_broadcast(m, f, output);
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = [&](const BroadcastInfo &e, const __fp16 *buf, const __fp16 *m_buf,
__fp16 *out_buf) {
if (e.strides[3] == 1 && output.strides[3] == 1 && strides[3] == 1) {
<< getName() << " is not contiguous, cannot divide";
apply_broadcast(m, f, output);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return output;
}
auto f = std::bind(std::plus<float>(), std::placeholders::_1, value);
return apply(f, out);
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = std::bind(std::plus<__fp16>(), std::placeholders::_1, value);
return apply(f, out);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return out;
}
return ML_ERROR_INVALID_PARAMETER;
}
- return ML_ERROR_NONE;
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = [&](const BroadcastInfo &e, const __fp16 *buf, const __fp16 *m_buf,
__fp16 *out_buf) {
saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]);
return ML_ERROR_INVALID_PARAMETER;
}
- return ML_ERROR_NONE;
+#else
+ ml_loge("%s", "Error: enable-fp16 is not enabled");
+ return ML_ERROR_INVALID_PARAMETER;
+#endif
}
+ return ML_ERROR_NONE;
}
Tensor Tensor::add(Tensor const &m, float const alpha) const {
};
apply_broadcast(m, f, output);
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = [&](const BroadcastInfo &e, const __fp16 *buf, const __fp16 *m_buf,
__fp16 *out_buf) {
if (e.strides[3] == 1 && strides[3] == 1 && strides[3] == 1 &&
}
};
apply_broadcast(m, f, output);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return output;
}
auto f = std::bind(std::minus<float>(), std::placeholders::_1, value);
return apply(f, out);
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = std::bind(std::minus<__fp16>(), std::placeholders::_1, value);
return apply(f, out);
+#else
+ ml_loge("%s", "Error: enable-fp16 is not enabled");
+#endif
}
+ return out; // shouldn't reach
}
int Tensor::subtract_i(Tensor const &m) { return add_i(m, -1); }
return apply(f, out);
}
if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto f = [exponent](__fp16 in) { return powf(in, exponent); };
return apply(f, out);
+#else
+ ml_loge("%s", "Error: enable-fp16 is not enabled");
+#endif
}
+ return out;
}
Tensor Tensor::getBatchSlice(size_t offset, unsigned int size) const {
ret_dims[i].setTensorDim(axis, sizes[i]);
}
- bool is_format_nchw = (dim.getFormat() == Tformat::NCHW);
+ bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false;
+ std::vector<Tensor> ret;
if (getDataType() == ml::train::TensorDim::DataType::FP32) {
auto iter_value = [this, is_format_nchw](
return value;
};
- std::vector<Tensor> ret;
ret.reserve(num_size);
unsigned int accumulated_size = 0;
return iter_value(loc, end_loc, reset_dim_arr);
});
}
-
- return ret;
}
if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto iter_value =
[this, is_format_nchw](
std::array<size_t, 4> &loc, const std::array<size_t, 4> &end_loc,
return value;
};
- std::vector<Tensor> ret;
ret.reserve(num_size);
unsigned int accumulated_size = 0;
});
}
- return ret;
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
+
+ return ret;
}
Tensor Tensor::cat(const std::vector<Tensor> &tensors, int axis) {
NNTR_THROW_IF(tensors.empty(), std::invalid_argument)
<< "given tensor vector is empty";
+ Tensor ret;
auto ref_dim = tensors.front().getDim();
bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW);
ref_dim.setTensorDim(axis, 1);
auto ret_dim = ref_dim;
ret_dim.setTensorDim(axis, axis_dim);
- auto ret = Tensor(ret_dim);
+ ret = Tensor(ret_dim);
std::array<unsigned, 4> loc = {0, 0, 0, 0};
for (auto &t : tensors) {
}
}
- return ret;
+ // return ret;
} else if (ref_dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
auto iter_value =
[is_format_nchw](std::array<unsigned, 4> &loc,
const std::array<unsigned, 4> &start_loc, Tensor &t,
auto ret_dim = ref_dim;
ret_dim.setTensorDim(axis, axis_dim);
- auto ret = Tensor(ret_dim);
+ ret = Tensor(ret_dim);
std::array<unsigned, 4> loc = {0, 0, 0, 0};
for (auto &t : tensors) {
}
}
- return ret;
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
+ return ret;
}
void Tensor::makeSharedDataTensor(const Tensor &src, size_t offset) {
return apply_broadcast_util(m, v_func, output, this->computeBroadcastInfo(m));
}
+#ifdef ENABLE_FP16
void Tensor::apply_broadcast(
Tensor const &m,
std::function<void(const BroadcastInfo &e, const __fp16 *, const __fp16 *,
void Tensor::apply_broadcast_util(
Tensor const &m,
- std::function<void(const BroadcastInfo &e, const float *, const float *,
- float *)>
+ std::function<void(const BroadcastInfo &e, const __fp16 *, const __fp16 *,
+ __fp16 *)>
v_func,
Tensor &output, const BroadcastInfo &e, int cur_axis, size_t offset,
size_t m_offset) const {
- const float *buf = this->getData();
- const float *m_buf = m.getData();
- float *out_buf = output.getData();
+ const __fp16 *buf = this->getData<__fp16>();
+ const __fp16 *m_buf = m.getData<__fp16>();
+ __fp16 *out_buf = output.getData<__fp16>();
if (e.buffer_axis == cur_axis) {
v_func(e, buf + offset, m_buf + m_offset, out_buf + offset);
}
cur_axis++;
- uint continuity[4] = {0, 1, 2, 3};
- if (getFormat() == Tformat::NHWC) {
- continuity[1] = 2;
- continuity[2] = 3;
- continuity[3] = 1;
- }
- for (unsigned int i = 0; i < dim.getTensorDim(continuity[cur_axis]); ++i) {
+ for (unsigned int i = 0; i < dim.getTensorDim(cur_axis); ++i) {
size_t next_offset = offset + i * strides[cur_axis];
size_t next_m_offset = m_offset + i * e.strides[cur_axis];
apply_broadcast_util(m, v_func, output, e, cur_axis, next_offset,
}
}
+#endif
+
void Tensor::apply_broadcast_util(
Tensor const &m,
- std::function<void(const BroadcastInfo &e, const __fp16 *, const __fp16 *,
- __fp16 *)>
+ std::function<void(const BroadcastInfo &e, const float *, const float *,
+ float *)>
v_func,
Tensor &output, const BroadcastInfo &e, int cur_axis, size_t offset,
size_t m_offset) const {
- const __fp16 *buf = this->getData<__fp16>();
- const __fp16 *m_buf = m.getData<__fp16>();
- __fp16 *out_buf = output.getData<__fp16>();
+ const float *buf = this->getData();
+ const float *m_buf = m.getData();
+ float *out_buf = output.getData();
if (e.buffer_axis == cur_axis) {
v_func(e, buf + offset, m_buf + m_offset, out_buf + offset);
}
cur_axis++;
- for (unsigned int i = 0; i < dim.getTensorDim(cur_axis); ++i) {
+ uint continuity[4] = {0, 1, 2, 3};
+ if (getFormat() == Tformat::NHWC) {
+ continuity[1] = 2;
+ continuity[2] = 3;
+ continuity[3] = 1;
+ }
+ for (unsigned int i = 0; i < dim.getTensorDim(continuity[cur_axis]); ++i) {
size_t next_offset = offset + i * strides[cur_axis];
size_t next_m_offset = m_offset + i * e.strides[cur_axis];
apply_broadcast_util(m, v_func, output, e, cur_axis, next_offset,
sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len,
ones.getData<float>(), 1, 0.0, rdata, 1);
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
__fp16 *rdata = ret.getData<__fp16>();
ones.setValue((__fp16)1.0);
sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len,
ones.getData<__fp16>(), 1, 0.0, rdata, 1);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return ret;
default:
throw std::out_of_range("Error: Dimension cannot exceed 3");
}
- return ret;
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
NNTR_THROW_IF(!contiguous, std::invalid_argument)
default:
throw std::out_of_range("Error: Dimension cannot exceed 3");
}
- return ret;
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
+ return ret;
}
Tensor Tensor::sum(const std::vector<unsigned int> &axes, float alpha) const {
ldb, beta, rdata, ldc);
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
const __fp16 *mdata = m.getData<__fp16>();
__fp16 *rdata = result.getData<__fp16>();
sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, data, lda, mdata,
ldb, beta, rdata, ldc);
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return result;
}
break;
}
- } else {
+ } else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *inptr = getData<__fp16>();
__fp16 *outptr = out.getData<__fp16>();
switch (indexI) {
}
break;
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return out;
data_[i] = 0.0;
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
__fp16 scale = 1.0 / (1 - dropout);
__fp16 *data_ = getData<__fp16>();
for (unsigned int i = 0; i < size(); ++i) {
else
data_[i] = 0.0;
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
std::fill(addr, addr + (*mask_len_val), en_mask_val);
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
for (unsigned int b = 0; b < batch(); b++) {
__fp16 *addr = getAddress<__fp16>(b, 0, 0, 0);
const uint *mask_len_val = mask_len.getAddress<uint>(b, 0, 0, 0);
std::fill(addr, addr + (*mask_len_val), (__fp16)en_mask_val);
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
}
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
__fp16 zoneout_fp16 = (__fp16)zoneout;
opposite.setRandBernoulli(zoneout_fp16);
data[i] = (__fp16)1.0;
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
out.copyfmt(init);
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
unsigned int len = size();
out << "data addr: " << data << '\n';
}
out.copyfmt(init);
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
NNTR_THROW_IF(!contiguous, std::invalid_argument)
<< getName() << "Tensor is not contiguous, cannot copy.";
- if (getDataType() == ml::train::TensorDim::DataType::FP16 &&
- buf == getData<__fp16>()) {
- return;
- } else if (getDataType() == ml::train::TensorDim::DataType::FP32 &&
- buf == getData()) {
- return;
+ if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ if (buf == getData<__fp16>()) {
+ return;
+ }
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+ } else if (getDataType() == ml::train::TensorDim::DataType::FP32) {
+ if (buf == getData()) {
+ return;
+ }
}
// std::string type_ =
// (getDataType() == ml::train::TensorDim::DataType::FP16) ? "FP16" : "NO";
// std::cout << type_ << std::endl;
if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
scopy(size(), (__fp16 *)buf, 1, getData<__fp16>(), 1);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
} else if (getDataType() == ml::train::TensorDim::DataType::FP32) {
scopy(size(), (float *)buf, 1, getData<float>(), 1);
}
}
}
}
- } else {
+ } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
for (unsigned int b = 0; b < batch(); ++b) {
for (unsigned int c = 0; c < channel(); ++c) {
for (unsigned int h = 0; h < height(); ++h) {
}
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
} else {
Tensor t = Tensor(from.getDim(), true);
}
}
}
- } else {
+ } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
for (unsigned int b = 0; b < batch(); ++b) {
for (unsigned int c = 0; c < channel(); ++c) {
for (unsigned int h = 0; h < height(); ++h) {
}
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
swap(t, *this);
}
if (getDataType() == ml::train::TensorDim::DataType::FP32) {
copy(from.getData());
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
copy(from.getData<__fp16>());
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
} else {
float *data = getData<float>();
std::fill(data, data + size(), val);
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
__fp16 *data = getData<__fp16>();
std::fill(data, data + size(), (__fp16)val);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
else
apply_i([](float val) -> float { return 0; });
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
if (contiguous)
sscal(size(), 0, getData<__fp16>(), 1);
else
apply_i([](__fp16 val) -> __fp16 { return 0; });
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
}
}
if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
size_t batch_size = batch();
size_t feature_len = dim.getFeatureLen();
std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
result[b] = std::distance(data, max_iter) - (b * feature_len);
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return result;
float Tensor::l2norm() const {
NNTR_THROW_IF(!contiguous, std::invalid_argument)
<< getName() << " is not contiguous, cannot get l2norm.";
-
+ float ret;
unsigned int len = size();
if (getDataType() == ml::train::TensorDim::DataType::FP32) {
const float *data = getData<float>();
- return snrm2(len, data, 1);
+ ret = snrm2(len, data, 1);
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
- return snrm2(len, data, 1);
+ ret = snrm2(len, data, 1);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
+ return ret;
}
float Tensor::max_abs() const {
<< getName() << " is not contiguous, cannot get max_abs.";
unsigned int len = size();
+ float ret;
if (getDataType() == ml::train::TensorDim::DataType::FP32) {
const float *data = getData<float>();
unsigned int idx = isamax(len, data, 1);
- return *(data + idx);
+ ret = *(data + idx);
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
unsigned int idx = isamax(len, data, 1);
- return *(data + idx);
+ ret = *(data + idx);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
+ return ret;
}
Tensor &Tensor::normalization(Tensor &output) const {
this->divide_i(max - min);
}
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
const __fp16 *data = getData<__fp16>();
auto bounds = std::minmax_element(data, data + size());
this->subtract_i(min);
this->divide_i(max - min);
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
std_dev_by_batch.divide_i(dim.getFeatureLen());
this->divide_i(std_dev_by_batch);
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
Tensor std_dev_by_batch(dim.batch(), 1, 1, 1);
std_dev_by_batch.setZero();
__fp16 *std_dev = std_dev_by_batch.getData<__fp16>();
std_dev_by_batch.divide_i(dim.getFeatureLen());
this->divide_i(std_dev_by_batch);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
}
}
}
- return output;
} else if (in.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
output.setZero();
for (unsigned int i = 0; i < in.batch(); ++i) {
for (unsigned int j = 0; j < in.channel(); ++j) {
}
}
}
- return output;
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
+ return output;
}
} /* namespace nntrainer */