2 * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
16 * @date 04 December 2019
17 * @brief This is Tensor class for calculation
18 * @see https://github.com/nnstreamer/nntrainer
19 * @author Jijoong Moon <jijoong.moon@samsung.com>
20 * @bug No known bugs except for NYI items
22 * @todo deprecate new tensor allocation for out of place operations.
36 #include <blas_interface.h>
38 #include <memory_data.h>
39 #include <nntrainer_error.h>
40 #include <nntrainer_log.h>
41 #include <tensor_dim.h>
42 #include <util_func.h>
45 #define EXCEPT_WHEN_DEBUG
47 #define EXCEPT_WHEN_DEBUG noexcept
50 #define MAKE_SHARED_TENSOR(...) std::make_shared<nntrainer::Tensor>(__VA_ARGS__)
52 #define CREATE_IF_EMPTY_DIMS(tensor, ...) \
55 tensor = Tensor(__VA_ARGS__); \
60 using TensorDim = ml::train::TensorDim;
61 using Tformat = ml::train::TensorDim::Format;
62 using Tdatatype = ml::train::TensorDim::DataType;
65 class SrcSharedTensor;
68 * @class Tensor Class for Calculation
69 * @brief Tensor Class for Calculation
74 * @brief Enumeration of Weight Initialization Type
75 * @todo support intialization from file
77 enum class Initializer {
78 ZEROS, /** Zero initialization */
79 ONES, /** One initialization */
80 LECUN_NORMAL, /** LeCun normal initialization */
81 LECUN_UNIFORM, /** uniform initialization */
82 XAVIER_NORMAL, /** Xavier normal initialization */
83 XAVIER_UNIFORM, /** Xavier uniform initialization */
84 HE_NORMAL, /** He normal initialization */
85 HE_UNIFORM, /** He uniform initialization */
86 NONE /** No initialization */
90 * @brief Basic Constructor of Tensor
92 Tensor(std::string name_ = "", Tformat fm = Tformat::NCHW,
93 Tdatatype d_type = Tdatatype::FP32) :
94 dim(TensorDim(fm, d_type)),
95 strides(dim.computeStrides()),
97 initializer(Initializer::NONE),
104 * @brief Constructor of Tensor with dimension, possibly lazily
105 * @param d Tensor dim for this tensor
106 * @param alloc_now If the memory of the tensor must be allocated
107 * @param init Initializer for the tensor
108 * @param name Name of the tensor
110 Tensor(const TensorDim &d, bool alloc_now,
111 Initializer init = Initializer::NONE, std::string name = "");
114 * @brief Constructor of Tensor with dimension/buf
115 * @param d Tensor dim for this tensor
117 * @note Memory for this tensor is instantaneously allocated
119 Tensor(const TensorDim &d, const void *buf = nullptr);
122 * @brief Constructor of Tensor
123 * @param[in] d0 Batch of Tensor
124 * @param[in] d1 Channel
125 * @param[in] d2 Height
126 * @param[in] d3 Width
128 Tensor(size_t d0, size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
129 Tdatatype d_type = Tdatatype::FP32) :
130 Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr){};
133 * @brief Constructor of Tensor
134 * @param[in] d1 Channel
135 * @param[in] d2 Height
136 * @param[in] d3 Width
138 Tensor(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
139 Tdatatype d_type = Tdatatype::FP32) :
140 Tensor(1, d1, d2, d3, fm, d_type){};
143 * @brief Constructor of Tensor with batch size one and d1 size one
144 * @param[in] d2 Height (NCHW) or Width (NHWC)
145 * @param[in] d3 Width (NCHW) or Channel (NHWC)
147 Tensor(size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
148 Tdatatype d_type = Tdatatype::FP32) :
149 Tensor(1, 1, d2, d3, fm, d_type){};
152 * @brief Constructor of Tensor with just Width or Channel
153 * @param[in] d3 Width (NCHW) or Channel (NHWC)
155 explicit Tensor(size_t d3, Tformat fm = Tformat::NCHW,
156 Tdatatype d_type = Tdatatype::FP32) :
157 Tensor(1, 1, 1, d3, fm, d_type){};
160 * @brief Constructor of Tensor
161 * @param[in] d0 Batch of Tensor
162 * @param[in] d1 Channel (NCHW) or Height (NHWC)
163 * @param[in] d2 Height (NCHW) or Width (NHWC)
164 * @param[in] d3 Width (NCHW) or Channel (NHWC)
166 Tensor(size_t d0, size_t d1, size_t d2, size_t d3,
167 ml::train::TensorDim::TensorType t_type) :
168 Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr){};
171 * @brief Constructor of Tensor
172 * @param[in] d1 Channel
173 * @param[in] d2 Height
174 * @param[in] d3 Width
176 Tensor(size_t d1, size_t d2, size_t d3,
177 ml::train::TensorDim::TensorType t_type) :
178 Tensor(1, d1, d2, d3, t_type){};
181 * @brief Constructor of Tensor with batch size one and d1 size one
182 * @param[in] d2 Height (NCHW) or Width (NHWC)
183 * @param[in] d3 Width (NCHW) or Channel (NHWC)
185 Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) :
186 Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3,
187 (t_type.format == Tformat::NCHW) ? d2 : 1,
188 (t_type.format == Tformat::NCHW) ? d3 : d2, t_type){};
190 * @brief Constructor of Tensor with just Width or Channel
191 * @param[in] d3 Width (NCHW) or Channel (NHWC)
193 explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type) :
194 Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1,
195 (t_type.format == Tformat::NCHW) ? d3 : 1, t_type){};
198 * @brief Constructor of Tensor
199 * @param[in] d data for the Tensor. It needs to set format properly.
202 Tensor(std::vector<std::vector<std::vector<std::vector<float>>>> const &d,
203 ml::train::TensorDim::TensorType t_type) {
204 if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
205 throw std::out_of_range(
206 "[Tensor] trying to initialize Tensor from empty vector");
208 // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
209 // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
210 // dim[1] == height, dim[2] == width, dim[3] == channel
211 dim.setTensorDim(0, d.size());
212 if (t_type.format == Tformat::NCHW) {
213 dim.setTensorDim(1, d[0].size());
214 dim.setTensorDim(2, d[0][0].size());
215 dim.setTensorDim(3, d[0][0][0].size());
217 dim.setTensorDim(2, d[0].size());
218 dim.setTensorDim(3, d[0][0].size());
219 dim.setTensorDim(1, d[0][0][0].size());
222 setTensorType(t_type);
224 strides = dim.computeStrides();
226 MemoryData *mem_data =
227 new MemoryData((void *)(new float[dim.getDataLen()]()));
228 data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
229 delete[] mem_data->getAddr<float>();
233 initializer = Initializer::NONE;
235 // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
236 // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
237 // dim[1] == height, dim[2] == width, dim[3] == channel
238 if (t_type.format == Tformat::NCHW) {
239 for (unsigned int i = 0; i < batch(); ++i)
240 for (unsigned int j = 0; j < channel(); ++j)
241 for (unsigned int k = 0; k < height(); ++k)
242 for (unsigned int l = 0; l < width(); ++l)
243 this->setValue(i, j, k, l, d[i][j][k][l]);
245 for (unsigned int i = 0; i < batch(); ++i)
246 for (unsigned int j = 0; j < height(); ++j)
247 for (unsigned int k = 0; k < width(); ++k)
248 for (unsigned int l = 0; l < channel(); ++l)
249 this->setValue(i, l, j, k, d[i][j][k][l]);
254 * @brief Constructor of Tensor
255 * @note This constructor copies vector again. needs refactoring
256 * @param[in] d data for the Tensor. It needs to set format properly.
258 Tensor(std::vector<std::vector<std::vector<float>>> const &d,
259 ml::train::TensorDim::TensorType t_type) :
260 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
263 * @brief Constructor of Tensor
264 * @note This constructor copies vector again. needs refactoring
265 * @param[in] d data for the Tensor with batch size one
267 Tensor(std::vector<std::vector<float>> const &d,
268 ml::train::TensorDim::TensorType t_type) :
269 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
272 Tensor(std::vector<std::vector<std::vector<std::vector<__fp16>>>> const &d,
273 ml::train::TensorDim::TensorType t_type) {
275 if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
276 throw std::out_of_range(
277 "[Tensor] trying to initialize Tensor from empty vector");
280 dim.setTensorDim(0, d.size());
281 if (t_type.format == Tformat::NCHW) {
282 dim.setTensorDim(1, d[0].size());
283 dim.setTensorDim(2, d[0][0].size());
284 dim.setTensorDim(3, d[0][0][0].size());
286 dim.setTensorDim(2, d[0].size());
287 dim.setTensorDim(3, d[0][0].size());
288 dim.setTensorDim(1, d[0][0][0].size());
291 setTensorType(t_type);
293 strides = dim.computeStrides();
295 MemoryData *mem_data =
296 new MemoryData((void *)(new __fp16[dim.getDataLen()]()));
297 data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
298 delete[] mem_data->getAddr<__fp16>();
302 initializer = Initializer::NONE;
304 setDataType(Tdatatype::FP16);
306 // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
307 // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
308 // dim[1] == height, dim[2] == width, dim[3] == channel
309 if (t_type.format == Tformat::NCHW) {
310 for (unsigned int i = 0; i < batch(); ++i)
311 for (unsigned int j = 0; j < channel(); ++j)
312 for (unsigned int k = 0; k < height(); ++k)
313 for (unsigned int l = 0; l < width(); ++l)
314 this->setValue(i, j, k, l, d[i][j][k][l]);
316 for (unsigned int i = 0; i < batch(); ++i)
317 for (unsigned int j = 0; j < height(); ++j)
318 for (unsigned int k = 0; k < width(); ++k)
319 for (unsigned int l = 0; l < channel(); ++l)
320 this->setValue(i, l, j, k, d[i][j][k][l]);
325 * @brief Constructor of Tensor
326 * @note This constructor copies vector again. needs refactoring
327 * @param[in] d data for the Tensor
329 Tensor(std::vector<std::vector<std::vector<__fp16>>> const &d,
330 ml::train::TensorDim::TensorType t_type) :
331 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
334 * @brief Constructor of Tensor
335 * @note This constructor copies vector again. needs refactoring
336 * @param[in] d data for the Tensor with batch size one
338 Tensor(std::vector<std::vector<__fp16>> const &d,
339 ml::train::TensorDim::TensorType t_type) :
340 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
345 * @brief Copy constructor of Tensor.
346 * @param[in] Tensor &
348 Tensor(const Tensor &rhs) = default;
351 * @brief Move constructor of Tensor.
352 * @param[in] Tensor &&
354 Tensor(Tensor &&rhs) noexcept = default;
357 * @brief Copy assignment operator.
358 * @param[in] rhs Tensor to be copied.
360 Tensor &operator=(const Tensor &rhs) = default;
363 * @brief Move assignment operator.
364 * @parma[in] rhs Tensor to be moved.
366 Tensor &operator=(Tensor &&rhs) noexcept = default;
369 * @brief Construct a new Tensor object from a buffer
370 * This will not copy buffer to a new tensor but directly uses it
373 * @param bytes buffer size in bytes
374 * @param d tensor dim
375 * @param offset offset to be used from current
376 * @return Tensor object
377 * @throws std::invalid_argument if buf is null
379 template <typename T = float>
380 static Tensor Map(T *buf, unsigned int bytes, const TensorDim &d,
382 if (d.getDataLen() == 0 || buf == nullptr) {
383 throw std::invalid_argument(
384 "[Tensor::Map] empty tensor dim is not allowed");
387 if (d.getDataLen() * sizeof(T) + offset > bytes) {
388 throw std::invalid_argument(
389 "Creating shared tensor of size bigger than tensor memory.");
394 tmp.strides = d.computeStrides();
395 /// Tensor does not own the memory
396 tmp.data = std::shared_ptr<MemoryData>(new MemoryData((void *)buf),
397 std::default_delete<MemoryData>());
403 friend void swap(Tensor &lhs, Tensor &rhs) noexcept {
404 std::swap(lhs.dim, rhs.dim);
405 std::swap(lhs.strides, rhs.strides);
406 std::swap(lhs.contiguous, rhs.contiguous);
407 std::swap(lhs.initializer, rhs.initializer);
408 std::swap(lhs.data, rhs.data);
409 std::swap(lhs.name, rhs.name);
413 * @brief Comparison operator overload
414 * @param[in] rhs Tensor to be compared with
416 bool operator==(const Tensor &rhs) const;
419 * @brief Comparison operator overload
420 * @param[in] rhs Tensor to be compared with
422 bool operator!=(const Tensor &rhs) const { return !(*this == rhs); }
425 * @brief Allocate memory for this tensor
430 * @brief Deallocate memory for this tensor
431 * @note This will not necessary free the memory as tensors share memory
439 * @brief Check if the tensor has memory allocated/assigned/associated
441 bool isAllocated() const { return data != nullptr; }
444 * @brief return value at specific location
445 * @param[in] batch batch location
446 * @param[in] c channel location
447 * @param[in] h height location
448 * @param[in] w width location
450 template <typename T = float>
451 const T &getValue(unsigned int batch, unsigned int c, unsigned int h,
452 unsigned int w) const noexcept {
453 return getValue<T>(getIndex(batch, c, h, w));
456 template <typename T = float>
457 T &getValue(unsigned int batch, unsigned int c, unsigned int h,
458 unsigned int w) noexcept {
459 return getValue<T>(getIndex(batch, c, h, w));
463 * @brief return value at specific location
464 * @param[in] idx location
466 template <typename T = float>
467 const T &getValue(unsigned int idx) const noexcept {
468 return getData<T>()[idx];
472 * @brief return value at specific location
473 * @param[in] idx location
475 template <typename T = float> T &getValue(unsigned int idx) noexcept {
476 return getData<T>()[idx];
480 * @brief Get the Value thinking that it is padded
481 * for example, for the tensor (virtually padded) below,
482 * getValue(0, 0, 2, 2, 1, 1, .0f) will return 5
483 * padding available for height and width axis for now
489 * @param b batch index
490 * @param c channel index
491 * @param h height index
492 * @param w width index
493 * @param ph padding height
494 * @param pw padding width
495 * @return float value
497 template <typename T = float>
498 const T getValuePaddedVirtual(unsigned int b, unsigned int c, unsigned int h,
499 unsigned int w, unsigned int ph,
501 T pad_value = 0) const EXCEPT_WHEN_DEBUG {
503 unsigned int padded_h = 2 * ph + h;
504 unsigned int padded_w = 2 * pw + w;
505 if (h > padded_h && w > padded_w) {
506 throw std::out_of_range(
507 "[Tensor::getValuePadded] trying to access out of range");
511 if (ph <= h && h < ph + height() && pw <= w && w < pw + width()) {
512 return getValue<T>(b, c, h - ph, w - pw);
519 * @brief Multiply value element by element immediately
520 * @param[in] value multiplier
521 * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right
522 * @retval #ML_ERROR_NONE Successful
524 int multiply_i(float const &value);
527 * @brief Multiply value element by element
528 * @param[in] value multiplier
529 * @retval Calculated Tensor
531 Tensor multiply(float const &value) const;
534 * @brief multiply value element by element
535 * @param[in] value multiplier
536 * @param[out] out out tensor to store the result
537 * @retval Calculated Tensor
539 Tensor &multiply(float const &value, Tensor &out) const;
542 * @brief Multiply Tensor Elementwise
543 * @param[in] m Tensor to be multiplied
544 * @param[in] beta scalar to multiply output with and add
545 * @retval #ML_ERROR_NONE successful
547 int multiply_i(Tensor const &m, const float beta = 0.0);
550 * @brief Multiply Tensor Element by Element ( Not the MxM )
551 * @param[in] m Tensor to be multiplied
552 * @param[in] beta scalar to multiply output with and add
553 * @retval Calculated Tensor
555 Tensor multiply(Tensor const &m, const float beta = 0.0) const;
558 * @brief Multiply Tensor Element by Element ( Not the MxM )
559 * @param[in] m Tensor to be multiplied
560 * @param[out] output Tensor to store the result
561 * @param[in] beta scalar to multiply output with and add
562 * @retval Calculated Tensor
564 Tensor &multiply(Tensor const &m, Tensor &output,
565 const float beta = 0.0) const;
568 * @brief Multiply Tensor Elementwise
569 * @param[in] m Tensor to be multiplied
570 * @param[in] beta scalar to multiply output with and add
571 * @retval #ML_ERROR_NONE successful
573 * @note support different strided inputs and output
574 * @note does not support broadcasting
576 * @todo merge this to multiply_i
578 int multiply_i_strided(Tensor const &m, const float beta = 0.0);
581 * @brief Multiply Tensor Element by Element ( Not the MxM )
582 * @param[in] m Tensor to be multiplied
583 * @param[in] beta scalar to multiply output with and add
584 * @retval Calculated Tensor
586 * @note support different strided inputs and output
587 * @note does not support broadcasting
589 * @todo merge this to multiply
591 Tensor multiply_strided(Tensor const &m, const float beta = 0.0) const;
594 * @brief Multiply Tensor Element by Element ( Not the MxM )
595 * @param[in] m Tensor to be multiplied
596 * @param[out] output Tensor to store the result
597 * @param[in] beta scalar to multiply output with and add
598 * @retval Calculated Tensor
600 * @note support different strided inputs and output
601 * @note does not support broadcasting
603 * @todo merge this to multiply
605 Tensor &multiply_strided(Tensor const &m, Tensor &output,
606 const float beta = 0.0) const;
609 * @brief Add Tensor Elementwise
610 * @param[in] m Tensor to be added
611 * @param[in] beta scalar to add output with and add
612 * @retval #ML_ERROR_NONE successful
614 * @note support different strided inputs and output
615 * @note does not support broadcasting
617 * @todo merge this to add_i
619 int add_i_strided(Tensor const &m, const float beta = 0.0);
622 * @brief Add Tensor Element by Element
623 * @param[in] m Tensor to be added
624 * @param[in] beta Value to be scale the added tensor
625 * @retval Calculated Tensor
627 * @note support different strided inputs and output
628 * @note does not support broadcasting
630 * @todo merge this to add
632 Tensor add_strided(Tensor const &m, const float beta = 0.0) const;
635 * @brief Add Tensor Element by Element
636 * @param[in] m Tensor to be added
637 * @param[out] output Tensor to store the result
638 * @param[in] beta Value to be scale the added tensor
639 * @retval Calculated Tensor
641 * @note support different strided inputs and output
642 * @note does not support broadcasting
644 * @todo merge this to add
646 Tensor &add_strided(Tensor const &m, Tensor &output,
647 const float beta = 0.0) const;
650 * @brief Divide value element by element immediately
651 * @param[in] value divisor
652 * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right
653 * @retval #ML_ERROR_NONE Successful
655 int divide_i(float const &value);
658 * @brief Divide value element by element
659 * @param[in] value Divisor
660 * @retval Calculated Tensor
662 Tensor divide(float const &value) const;
665 * @brief Divide value element by element
666 * @param[in] value Divisor
667 * @param[out] out out parameter to store the result
668 * @retval Calculated Tensor
670 Tensor ÷(float const &value, Tensor &out) const;
673 * @brief divide Tensor Elementwise
674 * @param[in] m Tensor to be multiplied
675 * @retval #ML_ERROR_NONE successful
677 int divide_i(Tensor const &m);
680 * @brief Divide Tensor Element by Element
681 * @param[in] m Divisor Tensor
682 * @retval Calculated Tensor
684 Tensor divide(Tensor const &m) const;
687 * @brief divide Tensor Elementwise
688 * @param[in] m Tensor to be multiplied
689 * @param[out] output Tensor to store the result
690 * @retval Calculated Tensor
692 Tensor ÷(Tensor const &m, Tensor &output) const;
695 * @brief Add Tensor Element immediately to target tensor without mem copy
696 * @param[in] value value to be added
697 * @retval #ML_ERROR_NONE Successful
698 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
700 int add_i(float const &value);
703 * @brief Add value Element by Element
704 * @param[in] value value to be added
705 * @retval Calculated Tensor
707 Tensor add(float const &value) const;
710 * @brief Add Tensor Element by Element
711 * @param[in] value value to be added
712 * @param[out] out Tensor to save output without allocating new memory
713 * @retval Calculated Tensor
715 Tensor &add(float const &value, Tensor &out) const;
718 * @brief Add Tensor Element by Element without mem copy
719 * @param[in] m Tensor to be added
720 * @param[out] alpha Values to be scaled
721 * @retval #ML_ERROR_NONE Successful
722 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
724 int add_i(Tensor const &m, float const alpha = 1);
727 * @brief Add Tensor Element by Element
728 * @param[in] m Tensor to be added
729 * @retval Calculated Tensor
731 Tensor add(Tensor const &m, float const alpha = 1) const;
734 * @brief Add Tensor Element by Element
735 * @param[in] m Tensor to be added
736 * @param[out] m Tensor to be out
737 * @retval Calculated Tensor
739 Tensor &add(Tensor const &m, Tensor &out, float const alpha = 1) const;
742 * @brief memcpyless version of subtract
743 * @param[in] value value to subtract
744 * @retval #ML_ERROR_NONE Successful
745 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
747 int subtract_i(float const &value);
750 * @brief subtract value Element by Element
751 * @param[in] value value to be subtracted
752 * @retval Calculated Tensor
754 Tensor subtract(float const &value) const;
757 * @brief Subtract Tensor Element by Element
758 * @param[in] value value to be added
759 * @param[out] out Tensor to save output without allocating new memory
760 * @retval Calculated Tensor
762 Tensor &subtract(float const &value, Tensor &out) const;
765 * @brief memcpyless version of subtract
766 * @param[in] m Tensor to be subtracted
767 * @retval #ML_ERROR_NONE Successful
768 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
770 int subtract_i(Tensor const &m);
773 * @brief Substract Tensor Element by Element
774 * @param[in] m Tensor to be subtracted
775 * @retval Calculated Tensor
777 Tensor subtract(Tensor const &m) const;
780 * @brief Subtract Tensor Element by Element
781 * @param[in] m Tensor to be added
782 * @param[out] m Tensor to be out
783 * @retval Calculated Tensor
785 Tensor &subtract(Tensor const &m, Tensor &out) const;
788 * @brief Tensor power elementwise
790 * @param exponent exponent
791 * @return int ML_ERROR_NONE if successful
793 int pow_i(float exponent);
796 * @brief Tensor power Element by Element
797 * @param[in] exponent exponent
798 * @retval Calculated Tensor
800 Tensor pow(float exponent) const;
803 * @brief Tensor power Element by Element
804 * @param[in] exponent exponent
805 * @param[out] out out to store the result
806 * @retval Calculated Tensor
808 Tensor &pow(float exponent, Tensor &out) const;
811 * @brief gaussian error function
812 * @return int ML_ERROR_NONE if successful
817 * @brief gaussian error function
818 * @retval Calculated Tensor
823 * @brief gaussian error function
824 * @param[out] out out to store the result
825 * @retval Calculated Tensor
827 Tensor &erf(Tensor &out) const;
829 unsigned int sizeofData() { return dim.getDataTypeSize(); }
832 * @brief Dot Product of Tensor ( equal MxM )
833 * @details This applies dot of the last dimension of this and second-last
834 * dimension of passed tensor m.
835 * @param[in] m Tensor
836 * @param[in] trans Transpose
837 * @param[in] trans_m Transpose m
838 * @retval Calculated Tensor
840 Tensor dot(Tensor const &m, bool trans = false, bool trans_m = false) const;
843 * @brief Dot Product of Tensor ( equal MxM )
844 * @details This applies dot of the last dimension of this and second-last
845 * dimension of passed tensor m.
846 * @param[in] m Tensor
847 * @param[in] output output Tensor
848 * @param[in] trans Transpose
849 * @param[in] trans_m Transpose m
850 * @param[in] beta beta
851 * @retval Calculated Tensor
853 Tensor &dot(Tensor const &m, Tensor &output, bool trans = false,
854 bool trans_m = false, float beta = 0.0f) const;
857 * @brief compute the derivative of this in the current tensor
858 * @param m same as given to the dot()
859 * @param output_deriv the derivative of the output
860 * @param[in] trans same as given to the dot()
861 * @param[in] trans_m same as given to the dot()
862 * @param[in] beta same as given to the dot()
863 * @note This will compute the derivative in-place and will overwrite existing
866 Tensor &dot_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv,
867 bool trans = false, bool trans_m = false,
871 * @brief compute the derivative wrt m in the m tensor
872 * @param m_deriv tensor where derivative wrt m will be stored
873 * @param output_deriv the derivative of the output
874 * @param[in] trans same as given to the dot()
875 * @param[in] trans_m same as given to the dot()
876 * @param[in] beta same as given to the dot()
877 * @note The caller tensor must be the same tensor as the one which called the
880 Tensor &dot_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv,
881 bool trans = false, bool trans_m = false,
882 float beta = 0.0f) const;
885 * @copydoc Tensor::dot(Tensor const &m, Tensor &output, bool trans,
886 bool trans_m, float beta) const
887 * @details performs dot operation over a batch of inputs
889 Tensor &dotBatched(Tensor const &m, Tensor &result, bool trans = false,
890 bool trans_m = false, float beta = 0.0f) const;
893 * @copydoc Tensor::dot_deriv_wrt_1(Tensor const &m, Tensor const
894 &output_deriv, bool trans, bool trans_m, float beta)
896 Tensor &dot_batched_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv,
897 bool trans = false, bool trans_m = false,
901 * @brief Tensor::dot_deriv_wrt_2(Tensor const &m_deriv, Tensor const
902 &output_deriv, bool trans, bool trans_m, float beta) const
904 Tensor &dot_batched_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv,
905 bool trans = false, bool trans_m = false,
906 float beta = 0.0f) const;
909 * @brief Transpose Tensor
911 * @param direction to transpose ex) 0:2:1
914 Tensor transpose(const std::string &direction) const;
917 * @brief Transpose Tensor
918 * @param direction to transpose ex) 0:2:1
919 * @param[out] Tensor to save to, dimension is always reshaped.
920 * @retval Tensor& reference to the out
922 Tensor &transpose(const std::string &direction, Tensor &out) const;
925 * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate)
926 * @param dropout drop out rate
927 * @retval Tensor& reference of drop out mask
929 Tensor dropout_mask(float dropout) const;
932 * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) inplace
933 * @param dropout drop out rate
935 void dropout_mask(float dropout);
938 * @brief Calculate filter mask
939 * @param mask_len length of each mask along the last axis
940 * @param invert invert the mask
942 void filter_mask(const Tensor &mask_len, bool reverse = false);
945 * @brief Calculate 2 Zone Out Mask
946 * @details Calculate zone out mask according to the bernoulli distribution.
947 * Zone out mask with rate @a zoneout for inplace and the other zone out mask
948 * with rate @a (1-zoneout).
949 * @param zoneout zone out rate
950 * @retval Tensor zone out mask for opposite tensor
952 Tensor zoneout_mask(float zoneout);
955 * @brief Calculate 2 Zone Out Mask
956 * @details Calculate zone out mask according to the bernoulli distribution.
957 * Zone out mask with rate @a zoneout for inplace and the other zone out mask
958 * with rate @a (1-zoneout).
959 * @param opposite opposite zone out mask
960 * @param zoneout zone out rate
962 void zoneout_mask(Tensor &opposite, float zoneout);
965 * @brief sum all the Tensor elements according to the batch
966 * @retval Calculated Tensor(batch, 1, 1, 1)
968 Tensor sum_by_batch() const;
971 * @brief sum all the Tensor elements according to the axis
972 * 0 : batch direction
973 * 1 : channel direction
974 * 2 : height direction
975 * 3 : width direction
976 * @param[in] axis Axis to calculate sum along
977 * @param[in] alpha Scale the sum by this value
978 * @retval Calculated Tensor
980 Tensor sum(unsigned int axis, float alpha = 1.0) const;
983 * @brief sum all the Tensor elements according to the axis
984 * 0 : batch direction
985 * 1 : channel direction
986 * 2 : height direction
987 * 3 : width direction
988 * @param[in] axis Axis to calculate sum along
989 * @param[out] output output tensor
990 * @param[in] alpha Scale the sum by this value
991 * @retval Calculated Tensor
993 Tensor &sum(unsigned int axis, Tensor &output, float alpha = 1.0,
994 float beta = 0.0) const;
997 * @brief sum all the Tensor by multiple axes
999 * @param axes axes to sum along
1000 * @param alpha Scale the sum by this value
1003 Tensor sum(const std::vector<unsigned int> &axes, float alpha = 1.0) const;
1006 * @brief sum all the Tensor by multiple axes
1008 * @param axes axes to sum along
1009 * @param[out] output output tensor
1010 * @param alpha Scale the sum by this value
1013 Tensor &sum(const std::vector<unsigned int> &axes, Tensor &output,
1014 float alpha = 1.0) const;
1017 * @brief Averaging the Tensor elements according to the axis
1018 * 0 : batch direction
1019 * 1 : channel direction
1020 * 2 : height direction
1021 * 3 : width direction
1022 * @retval Calculated Tensor
1024 Tensor average(unsigned int axis) const;
1026 * @brief Averaging the Tensor elements according to the axis
1028 * @retval Calculated Tensor
1030 Tensor &average(unsigned int axis, Tensor &output) const;
1033 * @brief average all the Tensor by multiple axes
1035 * @param axes axes to sum along
1038 Tensor average(const std::vector<unsigned int> &axes) const;
1041 * @brief average all the Tensor by multiple axes
1043 * @param axes axes to sum along
1044 * @param output output tensor
1047 Tensor &average(const std::vector<unsigned int> &axes, Tensor &output) const;
1050 * @brief Averaging the Tensor elements by all axis
1051 * @retval Calculated Tensor
1053 Tensor average() const;
1056 * @brief Averaging the Tensor elements by all axis
1057 * @retval Calculated Tensor
1059 Tensor &average(Tensor &output) const;
1062 * @brief Anchor a starting point to defer following evaluation
1063 * @retval LazyTensor class that can be used with run();
1065 LazyTensor chain() const;
1068 * @brief Softmax the Tensor elements
1069 * @retval Calculated Tensor
1071 Tensor softmax() const;
1074 * @brief l2norm the Tensor elements
1075 * @retval Calculated l2norm
1077 float l2norm() const;
1080 * @brief Normalize the Tensor elements
1081 * @retval Calculated Tensor
1083 Tensor &normalization(Tensor &output) const;
1086 * @brief Standardize the Tensor elements
1087 * @retval Calculated Tensor
1089 Tensor &standardization(Tensor &output) const;
1092 * @brief Normalize the Tensor elements in-place
1093 * @retval Calculated Tensor
1095 void normalization_i();
1098 * @brief Standardize the Tensor elements in-place
1099 * @retval Calculated Tensor
1101 void standardization_i();
1103 template <typename T = float> T *getAddress(unsigned int i) {
1104 size_t index = getIndex(batch(), channel(), height(), width());
1108 return &getData<T>()[i];
1112 * @brief i data index
1113 * @retval address of ith data
1115 template <typename T = float> const T *getAddress(unsigned int i) const {
1116 size_t index = getIndex(batch(), channel(), height(), width());
1121 return &getData<T>()[i];
1125 * @brief get address of n-d data
1127 template <typename T = float>
1128 T *getAddress(unsigned int b, unsigned int c, unsigned int h,
1130 return getAddress<T>(getIndex(b, c, h, w));
1134 * @brief get address of n-d data
1136 template <typename T = float>
1137 const T *getAddress(unsigned int b, unsigned int c, unsigned int h,
1138 unsigned int w) const {
1139 return getAddress<T>(getIndex(b, c, h, w));
1143 * @brief Apply instantly to the element
1145 * @param f function to apply
1146 * @return int ML_ERROR_NONE if successful
1148 int apply_i(std::function<float(float)> f) {
1149 Tensor result = *this;
1152 return ML_ERROR_NONE;
1156 * @brief Apply function element by element
1157 * @param[in] *function function pointer applied
1160 Tensor apply(std::function<float(float)> f) const {
1162 return apply(f, result);
1166 * @brief Apply function element by element
1167 * @param[in] *function function pointer applied
1168 * @param[out] output output tensor
1171 Tensor &apply(std::function<float(float)> f, Tensor &output) const {
1172 CREATE_IF_EMPTY_DIMS(output, dim, nullptr);
1174 if (dim != output.dim) {
1175 /// @todo add unittest
1176 throw std::invalid_argument(
1177 "[Tensor::apply] output dimension does not match");
1180 if (dim.getDataType() == Tdatatype::FP32) {
1181 if (contiguous && output.contiguous) {
1182 const float *data = (getData<float>());
1183 float *rdata = (output.getData<float>());
1185 std::transform(data, data + size(), rdata, f);
1186 } else if (strides[3] == 1 && output.strides[3] == 1) {
1187 /** @todo optimize this with combining these loops where stride is 1 */
1188 for (unsigned int b = 0; b < batch(); ++b) {
1189 for (unsigned int c = 0; c < channel(); ++c) {
1190 for (unsigned int h = 0; h < height(); ++h) {
1191 float *out_data = output.getAddress<float>(b, c, h, 0);
1192 const float *in_data = getAddress<float>(b, c, h, 0);
1193 std::transform(in_data, in_data + width(), out_data, f);
1198 for (unsigned int b = 0; b < batch(); ++b) {
1199 for (unsigned int c = 0; c < channel(); ++c) {
1200 for (unsigned int h = 0; h < height(); ++h) {
1201 for (unsigned int w = 0; w < width(); ++w) {
1202 output.setValue(b, c, h, w, f(getValue<float>(b, c, h, w)));
1208 } else if (dim.getDataType() == Tdatatype::FP16) {
1210 if (contiguous && output.contiguous) {
1211 const __fp16 *data = (getData<__fp16>());
1212 __fp16 *rdata = (output.getData<__fp16>());
1214 std::transform(data, data + size(), rdata, f);
1215 } else if (strides[3] == 1 && output.strides[3] == 1) {
1216 /** @todo optimize this with combining these loops where stride is 1 */
1217 for (unsigned int b = 0; b < batch(); ++b) {
1218 for (unsigned int c = 0; c < channel(); ++c) {
1219 for (unsigned int h = 0; h < height(); ++h) {
1220 __fp16 *out_data = (__fp16 *)output.getAddress(b, c, h, 0);
1221 const __fp16 *in_data = (__fp16 *)getAddress(b, c, h, 0);
1222 std::transform(in_data, in_data + width(), out_data, f);
1227 for (unsigned int b = 0; b < batch(); ++b) {
1228 for (unsigned int c = 0; c < channel(); ++c) {
1229 for (unsigned int h = 0; h < height(); ++h) {
1230 for (unsigned int w = 0; w < width(); ++w) {
1231 output.setValue(b, c, h, w,
1232 f((float)((__fp16)getValue(b, c, h, w))));
1239 throw std::invalid_argument("Error: enable-fp16 is not enabled");
1246 * @brief Apply function to Tensor
1247 * @param[in] *function function pointer applied
1250 Tensor apply(std::function<Tensor(Tensor)> f) const;
1253 * @brief Apply function to Tensor
1254 * @param[in] *function function pointer applied
1255 * @param[out] output output tensor
1258 Tensor &apply(std::function<Tensor &(Tensor, Tensor &)> f,
1259 Tensor &output) const;
1262 * @brief Print element
1263 * @param[in] out out stream
1266 void print(std::ostream &out) const;
1269 * @brief Print element
1270 * @param[in] out out stream
1271 * @param[in] opt print formatting option. opt=0 would pretty print the data,
1272 * else it would print the raw data.
1275 void print_(std::ostream &out, uint opt = 0) const;
1278 * @brief Get size of current tensor
1279 * @retval unsigned int size of the current tensor
1281 size_t size() const { return dim.getDataLen(); }
1284 * @brief Get if the tensor is empty
1285 * @retval true if the tensor is empty
1287 bool empty() const { return size() == 0; }
1290 * @brief Get size of the data in bytes
1291 * @retval size_t Size in bytes
1293 size_t bytes() const { return size() * dim.getDataTypeSize(); }
1296 * @brief Set the element value
1297 * @param[in] batch batch location
1298 * @param[in] c channel location
1299 * @param[in] h height location
1300 * @param[in] w width location
1301 * @param[in] value value to be stored
1303 void setValue(unsigned int batch, unsigned int c, unsigned int h,
1304 unsigned int w, float value) noexcept {
1305 if (getDataType() == Tdatatype::FP32) {
1306 getData<float>()[getIndex(batch, c, h, w)] = value;
1307 } else if (getDataType() == Tdatatype::FP16) {
1309 getData<__fp16>()[getIndex(batch, c, h, w)] = value;
1311 ml_loge("%s", "Error: enable-fp16 is not enabled");
1317 * @brief add the element value to the location
1318 * @param[in] batch batch location
1319 * @param[in] c channel location
1320 * @param[in] h height location
1321 * @param[in] w width location
1322 * @param[in] value value to be stored
1323 * @param[in] beta scalar to multiply output with and add
1325 void addValue(unsigned int batch, unsigned int c, unsigned int h,
1326 unsigned int w, float value, float beta) noexcept {
1327 auto const &idx = getIndex(batch, c, h, w);
1328 if (dim.getDataType() == Tdatatype::FP32) {
1329 getData<float>()[idx] *= beta;
1330 getData<float>()[idx] += value;
1331 } else if (dim.getDataType() == Tdatatype::FP16) {
1333 getData<__fp16>()[idx] *= beta;
1334 getData<__fp16>()[idx] += value;
1336 ml_loge("%s", "Error: enable-fp16 is not enabled");
1342 * @brief Set the element value
1343 * @param[in] offset offset from start location
1344 * @param[in] value value to be stored
1346 * @todo This is a temporary workout. Remove this once multiple datatypes
1349 void setValueInt(unsigned int offset, int value) noexcept {
1350 int *data_int = (int *)getData();
1351 data_int[offset] = value;
1355 * @brief Fill the Tensor elements with value
1356 * @param[in] value value to be stored
1358 void setValue(float value);
1361 * @brief Fill the Tensor elements with zero
1366 * @brief Set the Dist object
1368 * @tparam T distrubution engine
1369 * @param dist distribution engine
1371 template <typename T, typename Engine> void setDist(Engine dist) {
1372 NNTR_THROW_IF(!contiguous, std::invalid_argument)
1373 << getName() << " Tensor is not contiguous, cannot set distribution";
1375 T *data_ = getData<T>();
1376 unsigned int len = size();
1377 for (unsigned int i = 0; i < len; ++i) {
1378 data_[i] = (T)dist(rng);
1383 * @brief Set the tensor with random normal distribution
1384 * @param[in] mean mean of the distribution
1385 * @param[in] std standard deviation of the distribution
1387 void setRandNormal(float mean = 0.0f, float std = 0.05f);
1390 * @brief Set the tensor with random uniform distribution
1391 * @param[in] min minimum value for the distribution
1392 * @param[in] max maximum value for the distribution
1394 void setRandUniform(float min = -0.05f, float max = 0.05f);
1397 * @brief Set the tensor with random bernoulli distribution
1398 * @param[in] probability probability value for the distribution
1400 void setRandBernoulli(float probability = 0.5f);
1403 * @brief Initialize the memory of the given tensor
1408 * @brief Initialize the memory of the given tensor
1409 * @param init Initiailizer to use for the initialization
1411 void initialize(Initializer init) {
1417 * @brief set the memory format
1418 * @param fm format of Tensor
1420 void convertFormat(TensorDim::Format fm) {
1421 if (getFormat() != fm) {
1429 * @brief Copy the Tensor
1430 * @param[in] from Tensor to be copied
1432 * @note copy can reshape the tensor to match the shape
1434 void copy(const Tensor &from);
1437 * @brief Copy the Tensor
1438 * @param[in] from Tensor to be copied
1440 void copyData(const Tensor &from);
1443 * @brief Copy the Tensor
1444 * @param[in] from Tensor to be copied
1446 void copy_with_stride(const Tensor &from);
1449 * @brief Get slice of the tensor, sliced by batch
1450 * @param[in] offset offset in batch to start the slice
1451 * @param[in] size size of the slice
1452 * @retval slice of this tensor
1453 * @note This function provides a slice of this tensor, and does not create a
1456 Tensor getBatchSlice(size_t offset, unsigned int size) const;
1459 * @brief Get new tensor which shares memory with current tensor but different
1462 * @param dim new dimension to be set for this tensor
1463 * @param offset offset to be used from the start of the data in elements
1464 * @note The new tensor will share the same data as the current tensor but
1465 * can have different size.
1466 * @note New size added with offset must be less than the size of the original
1469 Tensor getSharedDataTensor(const TensorDim dim, size_t offset,
1470 bool reset_stride = true,
1471 const std::string &name_ = "") const;
1473 * @brief split tensor along axis.
1475 * @param num_size num_size
1477 * @return Tensor splitted tensor
1479 std::vector<Tensor> split(unsigned num_size, int axis = 0);
1482 * @brief split tensor along axis.
1484 * @param sizes sizes
1486 * @return Tensor splitted tensor
1487 * @note if the given array sizes is just a 1 unsigned int value, assumes that
1488 * it divide tensor by given size evenly
1490 std::vector<Tensor> split(std::vector<size_t> sizes, int axis = 0);
1493 * @brief concatenate tensors along axis
1495 * @param tensors tensors to be concatenated to the first tensor
1497 * @return Tensor concatenated tensor
1499 static Tensor cat(const std::vector<Tensor> &tensors, int axis = 0);
1502 * @brief make this tensor share memory with given tensor
1504 * @param src Source tensor whose memory is to be shared
1505 * @param offset offset to be used from the start of the data in bytes
1506 * @note This tensor will share the same data as the current tensor but
1507 * can have different size.
1508 * @note This tensor's size added with offset must be less than the size of
1509 * the source tensor.
1510 * @note The stride of the source tensor and this tensor must be same.
1512 void makeSharedDataTensor(const Tensor &src, size_t offset = 0);
1515 * @brief Convient wrapper for inplace copy of @a this.
1516 * @retval Copied version of this
1518 Tensor clone() const;
1521 * @brief Save the Tensor into file
1522 * @param[in] file output file stream
1524 void save(std::ostream &file);
1527 * @brief Read the Tensor from file
1528 * @param[in] file input file stream
1530 void read(std::ifstream &file);
1533 * @brief return argument index which value is max by batch
1534 * @retval unsigned int argument index
1536 std::vector<unsigned int> argmax() const;
1539 * @brief return max of the absolute values of the tensor
1540 * @retval maximum absolute value
1542 float max_abs() const;
1545 * @brief return a copy of the Tensor Dim
1548 TensorDim getDim() const { return TensorDim(dim); }
1551 * @brief return Tensor Dim for a given axis
1554 size_t getTensorDim(unsigned int axis);
1557 * @brief return Tensor Type
1559 TensorDim::TensorType getTensorType() const { return dim.getTensorType(); };
1562 * @brief return Tensor batch size
1563 * @retval batch size
1565 size_t batch() const { return dim.batch(); }
1568 * @brief return Tensor batch size
1569 * @retval batch size
1571 size_t channel() const { return dim.channel(); }
1574 * @brief return Tensor height size
1575 * @retval height size
1577 size_t height() const { return dim.height(); }
1580 * @brief return Tensor batch size
1581 * @retval width size
1583 size_t width() const { return dim.width(); }
1586 * @brief return Tensor Data Type Size
1587 * @retval data type size
1589 uint getDataTypeSize() const { return dim.getDataTypeSize(); }
1592 * @brief update batch size for this tensor
1594 * @note The batchsize of src_tensor need not be related with this
1595 * tensor's batch size
1597 * @note The memory for this tensor will re-allocated/re-assigned if the
1598 * updated batch size is different than the current batch size.
1600 * @note If this tensor is/was the src_tensor for some other, then
1601 * reduction in batch size can make the dependent tensors allocate fail due to
1602 * memory smaller. Caller must handle this in their own end.
1604 * @note If this tensor is re-allocated, then the memory might not be
1605 * immediately freed as the tensor already depending on this tensor also
1606 * share the same memory. So, the peak memory consumption in worst case can
1607 * reach the total memory requirements of a model with old batchsize and the
1608 * new batch size. It is recommended to first deallocate all the tensors,
1609 * updateBatch and then allocate again to avoid such issues.
1611 void updateBatch(unsigned int batch) {
1612 if (dim.batch() == batch) {
1617 throw std::invalid_argument(
1618 "Cannot update batch for an allocated tensor");
1623 * @brief return Data pointer of Tensor
1624 * @retval template T pointer (float pointer as default)
1626 template <typename T = float> T *getData() {
1631 return data->getAddr<T>() + offset;
1635 * @brief return Data pointer of Tensor
1636 * @retval template T pointer (float pointer as default)
1638 template <typename T = float> const T *getData() const {
1643 return data->getAddr<T>() + offset;
1647 * @brief return Data pointer of Tensor
1648 * @retval template T pointer (float pointer as default)
1650 template <typename T = float> T *getData(size_t idx) const {
1657 return data->getAddr<T>() + offset + index;
1660 void setDataType(Tdatatype d_type) { dim.setDataType(d_type); }
1662 void setTensorType(ml::train::TensorDim::TensorType t_type) {
1663 dim.setTensorType(t_type);
1667 * @brief put data of Tensor
1669 * @note It is only effective when memory_swap is used
1671 void putData() const {
1679 * @brief return Data pointer of Tensor
1680 * @retval template T pointer (float pointer as default)
1682 const std::shared_ptr<MemoryData> getMemoryData() const { return data; }
1685 * @brief return offset
1687 size_t getOffset() const { return offset; }
1690 * @brief i data index
1691 * @retval address of ith data
1694 * @brief set Tensor Dim
1695 * @param[in] d TensorDim
1696 * @note Throws std::invalid_argument if size mismatch
1698 void reshape(const TensorDim &d);
1701 * @brief fill tensor data with current value,
1702 * if dimension is not exactly same, it is a hard error in this function
1703 * so, only stride is overriden to @a this
1705 * @param from Tensor to fill the data from
1706 * @param allocate if unallocated, allocate with from.getDim()
1707 * @throws std::invalid_argument if dimension and stride does not match
1709 void fill(const Tensor &from, bool allocate = false);
1712 * @brief return current stride of tensor.
1713 * @retval int[MAXDIM] strides
1715 const std::array<size_t, TensorDim::MAXDIM> getStrides() const noexcept {
1719 * @brief Get linear index given the n-d index
1721 inline size_t getIndex(unsigned int b, unsigned int c, unsigned int h,
1722 unsigned int w) const noexcept {
1723 if (getFormat() == Tformat::NCHW) {
1724 return (b * strides[0] + c * strides[1] + h * strides[2] +
1727 return (b * strides[0] + h * strides[1] + w * strides[2] +
1733 * @brief Check if two given axes are contiguous
1735 bool checkContinuous(unsigned int n, unsigned int np1) const {
1736 std::vector<unsigned int> continuous_order_nhwc = {0, 3, 1, 2};
1737 bool continuous = false;
1738 if (getFormat() == Tformat::NHWC) {
1739 if (continuous_order_nhwc[np1] == continuous_order_nhwc[n] + 1)
1749 * @brief Get name of the tensor
1751 * @return name of the tensor
1753 void setName(const std::string &name_) { name = name_; }
1756 * @brief Get name of the tensor
1758 * @return name of the tensor
1760 const std::string &getName() const { return name; }
1763 * @brief Set the memory buffer for the tensor
1765 * @param buf the memory buffer
1766 * @param init intialize the buffer
1768 void setData(const std::shared_ptr<MemoryData> buf, size_t off = 0,
1769 bool init = false) {
1782 * @brief Get initializer for the tensor
1784 * @return initializer of the tensor
1786 Tensor::Initializer getInitializer() const { return initializer; }
1789 * @brief Get format for the tensor
1791 * @return format of the tensor
1793 TensorDim::Format getFormat() const { return dim.getFormat(); }
1796 * @brief Get data type for the tensor
1798 * @return data type of the tensor
1800 Tdatatype getDataType() const { return dim.getDataType(); }
1802 static constexpr float epsilon = 1e-5;
1805 /**< handle the data as a std::shared_ptr<float> type */
1807 std::array<size_t, TensorDim::MAXDIM> strides;
1809 Tensor::Initializer initializer;
1810 std::string name; /**< name of the tensor */
1811 std::shared_ptr<MemoryData> data;
1815 * When using shared_data with tensor, this stores the ptr of the source
1816 * tensor which handles the full memory. If tensor data is already allocated,
1817 * this does not affect the tensor. If the tensor data is not allocated, and
1818 * src_ptr is valid, this tensor will use the memory allocated by the src_ptr
1820 std::shared_ptr<SrcSharedTensor> src_tensor;
1822 struct BroadcastInfo;
1825 * @brief Applies the given operator to the tensor with the passed argument
1826 * @param[in] m Tensor
1827 * @param[in] v_func vectorized function to apply
1828 * @param e broadcast info.
1829 * @param cur_axis current axis. pass default when calling outside.
1830 * @param offset offset for this. pass default when calling outside.
1831 * @param m_offset offset for m. pass default when calling outside.
1832 * @retval #ML_ERROR_NONE Successful
1833 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
1836 apply_broadcast_util(Tensor const &m,
1837 std::function<void(const BroadcastInfo &e, const float *,
1838 const float *, float *)>
1840 Tensor &output, const BroadcastInfo &e,
1841 int cur_axis = -1, size_t offset = 0,
1842 size_t m_offset = 0) const;
1845 * @brief Applies the given operator to the tensor with the passed argument
1847 * @param[in] m Tensor
1848 * @param[in] v_func vectorized function to apply
1849 * @retval #ML_ERROR_NONE Successful
1850 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
1852 void apply_broadcast(Tensor const &m,
1853 std::function<void(const BroadcastInfo &e, const float *,
1854 const float *, float *)>
1856 Tensor &output) const;
1858 void apply_broadcast_util(
1860 std::function<void(const BroadcastInfo &e, const __fp16 *, const __fp16 *,
1863 Tensor &output, const BroadcastInfo &e, int cur_axis = -1,
1864 size_t offset = 0, size_t m_offset = 0) const;
1867 apply_broadcast(Tensor const &m,
1868 std::function<void(const BroadcastInfo &e, const __fp16 *,
1869 const __fp16 *, __fp16 *)>
1871 Tensor &output) const;
1874 * @brief compute Loop info for broadcasting and vectorization
1876 * @param m target tensor to be calculated against.
1877 * @return BroadcastInfo Loopinfo needed to run external loop
1879 BroadcastInfo computeBroadcastInfo(const Tensor &m) const;
1882 * @brief copy a buffer to @a this, the caller has to ensure that @a this is
1883 * initialized otherwise undefined behavior
1885 * @param buf buffer to copy from
1887 void copy(const void *buf);
1890 * @brief Update destination tensor to share memory with source tensor
1892 * @param src src tensor containing the memory
1893 * @param dest destination tensor which will share the memory
1894 * @param offset offset to be used from the start of the data in bytes
1895 * @note The new tensor will share the same data as the current tensor but
1896 * can have different size.
1897 * @note New size added with offset must be less than the size of the original
1900 static void createSharedDataTensor(const Tensor &src, Tensor &dest,
1904 * @brief Reallocate memory for this tensor
1905 * @note This will not necessary free the memory as tensors share memory
1906 * @note This can increase the peak memory consumption when callled on all
1907 * the tensors of a model sequentially. It is advised to first deallocate all
1908 * the tensors and then allocate, than reallocate tensors one by one.
1916 * @brief Merge the given two axis for tensor at second axis inplace
1918 * @param axis1 first axis to merge
1919 * @param axis2 second axis to merge
1921 void mergeAxis(unsigned int axis1, unsigned int axis2);
1924 * @brief rotate 180 dgree
1925 * @param[in] in input Tensor
1926 * @retVal Tensor rotated tensor (180 degree)
1928 Tensor rotate_180(Tensor in);
1930 }; // namespace nntrainer
1933 * @brief Overriding output stream
1935 std::ostream &operator<<(std::ostream &out, Tensor const &m);
1937 typedef std::shared_ptr<Tensor> sharedTensor;
1939 typedef std::shared_ptr<const Tensor> sharedConstTensor;
1941 typedef std::vector<sharedConstTensor> sharedConstTensors;
1943 typedef std::vector<sharedTensor> sharedTensors;
1945 } /* namespace nntrainer */
1947 #endif /* __cplusplus */
1948 #endif /* __TENSOR_H__ */