#include <iostream>
#include <memory_data.h>
#include <nntrainer_error.h>
+#include <nntrainer_log.h>
#include <tensor_dim.h>
#include <util_func.h>
ml::train::TensorDim::TensorType t_type) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+#ifdef ENABLE_FP16
Tensor(std::vector<std::vector<std::vector<std::vector<__fp16>>>> const &d,
ml::train::TensorDim::TensorType t_type) {
ml::train::TensorDim::TensorType t_type) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+#endif
+
/**
* @brief Copy constructor of Tensor.
* @param[in] Tensor &
}
}
} else if (dim.getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
if (contiguous && output.contiguous) {
const __fp16 *data = (getData<__fp16>());
__fp16 *rdata = (output.getData<__fp16>());
}
}
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
-
return output;
};
if (getDataType() == Tdatatype::FP32) {
getData<float>()[getIndex(batch, c, h, w)] = value;
} else if (getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
getData<__fp16>()[getIndex(batch, c, h, w)] = value;
+#else
+ ml_loge("%s", "Error: enable-fp16 is not enabled");
+#endif
}
}
getData<float>()[idx] *= beta;
getData<float>()[idx] += value;
} else if (dim.getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
getData<__fp16>()[idx] *= beta;
getData<__fp16>()[idx] += value;
+#else
+ ml_loge("%s", "Error: enable-fp16 is not enabled");
+#endif
}
}
return nullptr;
data->validate();
- return (T *)((data->getAddr<T>()) + offset);
+ return data->getAddr<T>() + offset;
}
/**
return nullptr;
data->validate();
- return (T *)(data->getAddr<T>() + offset);
+ return data->getAddr<T>() + offset;
}
/**
if (!data)
return nullptr;
- size_t index = idx * sizeof(T);
+ size_t index = idx;
data->validate();
- return (T *)(data->getAddr<T>() + offset + index);
+ return data->getAddr<T>() + offset + index;
}
void setDataType(Tdatatype d_type) { dim.setDataType(d_type); }
/**
* @brief return offset
*/
- unsigned int getOffset() const { return offset; }
+ size_t getOffset() const { return offset; }
/**
* @brief i data index
* @param buf the memory buffer
* @param init intialize the buffer
*/
- void setData(const std::shared_ptr<MemoryData> buf, unsigned int off = 0,
+ void setData(const std::shared_ptr<MemoryData> buf, size_t off = 0,
bool init = false) {
if (buf) {
data = buf;
Tensor::Initializer initializer;
std::string name; /**< name of the tensor */
std::shared_ptr<MemoryData> data;
- unsigned int offset;
+ size_t offset;
/**<
* When using shared_data with tensor, this stores the ptr of the source
int cur_axis = -1, size_t offset = 0,
size_t m_offset = 0) const;
- void apply_broadcast_util(
- Tensor const &m,
- std::function<void(const BroadcastInfo &e, const __fp16 *, const __fp16 *,
- __fp16 *)>
- v_func,
- Tensor &output, const BroadcastInfo &e, int cur_axis = -1,
- size_t offset = 0, size_t m_offset = 0) const;
-
/**
* @brief Applies the given operator to the tensor with the passed argument
*
const float *, float *)>
v_func,
Tensor &output) const;
+#ifdef ENABLE_FP16
+ void apply_broadcast_util(
+ Tensor const &m,
+ std::function<void(const BroadcastInfo &e, const __fp16 *, const __fp16 *,
+ __fp16 *)>
+ v_func,
+ Tensor &output, const BroadcastInfo &e, int cur_axis = -1,
+ size_t offset = 0, size_t m_offset = 0) const;
void
apply_broadcast(Tensor const &m,
const __fp16 *, __fp16 *)>
v_func,
Tensor &output) const;
-
+#endif
/**
* @brief compute Loop info for broadcasting and vectorization
*