2 * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
16 * @date 04 December 2019
17 * @brief This is Tensor class for calculation
18 * @see https://github.com/nnstreamer/nntrainer
19 * @author Jijoong Moon <jijoong.moon@samsung.com>
20 * @bug No known bugs except for NYI items
22 * @todo deprecate new tensor allocation for out of place operations.
36 #include <blas_interface.h>
38 #include <memory_data.h>
39 #include <nntrainer_error.h>
40 #include <tensor_dim.h>
41 #include <util_func.h>
44 #define EXCEPT_WHEN_DEBUG
46 #define EXCEPT_WHEN_DEBUG noexcept
49 #define MAKE_SHARED_TENSOR(...) std::make_shared<nntrainer::Tensor>(__VA_ARGS__)
51 #define CREATE_IF_EMPTY_DIMS(tensor, ...) \
54 tensor = Tensor(__VA_ARGS__); \
59 using TensorDim = ml::train::TensorDim;
60 using Tformat = ml::train::TensorDim::Format;
61 using Tdatatype = ml::train::TensorDim::DataType;
64 class SrcSharedTensor;
67 * @class Tensor Class for Calculation
68 * @brief Tensor Class for Calculation
73 * @brief Enumeration of Weight Initialization Type
74 * @todo support intialization from file
76 enum class Initializer {
77 ZEROS, /** Zero initialization */
78 ONES, /** One initialization */
79 LECUN_NORMAL, /** LeCun normal initialization */
80 LECUN_UNIFORM, /** uniform initialization */
81 XAVIER_NORMAL, /** Xavier normal initialization */
82 XAVIER_UNIFORM, /** Xavier uniform initialization */
83 HE_NORMAL, /** He normal initialization */
84 HE_UNIFORM, /** He uniform initialization */
85 NONE /** No initialization */
89 * @brief Basic Constructor of Tensor
91 Tensor(std::string name_ = "", Tformat fm = Tformat::NCHW,
92 Tdatatype d_type = Tdatatype::FP32) :
93 dim(TensorDim(fm, d_type)),
94 strides(dim.computeStrides()),
96 initializer(Initializer::NONE),
103 * @brief Constructor of Tensor with dimension, possibly lazily
104 * @param d Tensor dim for this tensor
105 * @param alloc_now If the memory of the tensor must be allocated
106 * @param init Initializer for the tensor
107 * @param name Name of the tensor
109 Tensor(const TensorDim &d, bool alloc_now,
110 Initializer init = Initializer::NONE, std::string name = "");
113 * @brief Constructor of Tensor with dimension/buf
114 * @param d Tensor dim for this tensor
116 * @note Memory for this tensor is instantaneously allocated
118 Tensor(const TensorDim &d, const void *buf = nullptr);
121 * @brief Constructor of Tensor
122 * @param[in] d0 Batch of Tensor
123 * @param[in] d1 Channel
124 * @param[in] d2 Height
125 * @param[in] d3 Width
127 Tensor(size_t d0, size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
128 Tdatatype d_type = Tdatatype::FP32) :
129 Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr){};
132 * @brief Constructor of Tensor
133 * @param[in] d1 Channel
134 * @param[in] d2 Height
135 * @param[in] d3 Width
137 Tensor(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
138 Tdatatype d_type = Tdatatype::FP32) :
139 Tensor(1, d1, d2, d3, fm, d_type){};
142 * @brief Constructor of Tensor with batch size one and d1 size one
143 * @param[in] d2 Height (NCHW) or Width (NHWC)
144 * @param[in] d3 Width (NCHW) or Channel (NHWC)
146 Tensor(size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
147 Tdatatype d_type = Tdatatype::FP32) :
148 Tensor(1, 1, d2, d3, fm, d_type){};
151 * @brief Constructor of Tensor with just Width or Channel
152 * @param[in] d3 Width (NCHW) or Channel (NHWC)
154 explicit Tensor(size_t d3, Tformat fm = Tformat::NCHW,
155 Tdatatype d_type = Tdatatype::FP32) :
156 Tensor(1, 1, 1, d3, fm, d_type){};
159 * @brief Constructor of Tensor
160 * @param[in] d0 Batch of Tensor
161 * @param[in] d1 Channel (NCHW) or Height (NHWC)
162 * @param[in] d2 Height (NCHW) or Width (NHWC)
163 * @param[in] d3 Width (NCHW) or Channel (NHWC)
165 Tensor(size_t d0, size_t d1, size_t d2, size_t d3,
166 ml::train::TensorDim::TensorType t_type) :
167 Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr){};
170 * @brief Constructor of Tensor
171 * @param[in] d1 Channel
172 * @param[in] d2 Height
173 * @param[in] d3 Width
175 Tensor(size_t d1, size_t d2, size_t d3,
176 ml::train::TensorDim::TensorType t_type) :
177 Tensor(1, d1, d2, d3, t_type){};
180 * @brief Constructor of Tensor with batch size one and d1 size one
181 * @param[in] d2 Height (NCHW) or Width (NHWC)
182 * @param[in] d3 Width (NCHW) or Channel (NHWC)
184 Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) :
185 Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3,
186 (t_type.format == Tformat::NCHW) ? d2 : 1,
187 (t_type.format == Tformat::NCHW) ? d3 : d2, t_type){};
189 * @brief Constructor of Tensor with just Width or Channel
190 * @param[in] d3 Width (NCHW) or Channel (NHWC)
192 explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type) :
193 Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1,
194 (t_type.format == Tformat::NCHW) ? d3 : 1, t_type){};
197 * @brief Constructor of Tensor
198 * @param[in] d data for the Tensor. It needs to set format properly.
201 Tensor(std::vector<std::vector<std::vector<std::vector<float>>>> const &d,
202 ml::train::TensorDim::TensorType t_type) {
203 if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
204 throw std::out_of_range(
205 "[Tensor] trying to initialize Tensor from empty vector");
207 // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
208 // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
209 // dim[1] == height, dim[2] == width, dim[3] == channel
210 dim.setTensorDim(0, d.size());
211 if (t_type.format == Tformat::NCHW) {
212 dim.setTensorDim(1, d[0].size());
213 dim.setTensorDim(2, d[0][0].size());
214 dim.setTensorDim(3, d[0][0][0].size());
216 dim.setTensorDim(2, d[0].size());
217 dim.setTensorDim(3, d[0][0].size());
218 dim.setTensorDim(1, d[0][0][0].size());
221 setTensorType(t_type);
223 strides = dim.computeStrides();
225 MemoryData *mem_data =
226 new MemoryData((void *)(new float[dim.getDataLen()]()));
227 data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
228 delete[] mem_data->getAddr<float>();
232 initializer = Initializer::NONE;
234 // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
235 // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
236 // dim[1] == height, dim[2] == width, dim[3] == channel
237 if (t_type.format == Tformat::NCHW) {
238 for (unsigned int i = 0; i < batch(); ++i)
239 for (unsigned int j = 0; j < channel(); ++j)
240 for (unsigned int k = 0; k < height(); ++k)
241 for (unsigned int l = 0; l < width(); ++l)
242 this->setValue(i, j, k, l, d[i][j][k][l]);
244 for (unsigned int i = 0; i < batch(); ++i)
245 for (unsigned int j = 0; j < height(); ++j)
246 for (unsigned int k = 0; k < width(); ++k)
247 for (unsigned int l = 0; l < channel(); ++l)
248 this->setValue(i, l, j, k, d[i][j][k][l]);
253 * @brief Constructor of Tensor
254 * @note This constructor copies vector again. needs refactoring
255 * @param[in] d data for the Tensor. It needs to set format properly.
257 Tensor(std::vector<std::vector<std::vector<float>>> const &d,
258 ml::train::TensorDim::TensorType t_type) :
259 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
262 * @brief Constructor of Tensor
263 * @note This constructor copies vector again. needs refactoring
264 * @param[in] d data for the Tensor with batch size one
266 Tensor(std::vector<std::vector<float>> const &d,
267 ml::train::TensorDim::TensorType t_type) :
268 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
270 Tensor(std::vector<std::vector<std::vector<std::vector<__fp16>>>> const &d,
271 ml::train::TensorDim::TensorType t_type) {
273 if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
274 throw std::out_of_range(
275 "[Tensor] trying to initialize Tensor from empty vector");
278 dim.setTensorDim(0, d.size());
279 if (t_type.format == Tformat::NCHW) {
280 dim.setTensorDim(1, d[0].size());
281 dim.setTensorDim(2, d[0][0].size());
282 dim.setTensorDim(3, d[0][0][0].size());
284 dim.setTensorDim(2, d[0].size());
285 dim.setTensorDim(3, d[0][0].size());
286 dim.setTensorDim(1, d[0][0][0].size());
289 setTensorType(t_type);
291 strides = dim.computeStrides();
293 MemoryData *mem_data =
294 new MemoryData((void *)(new __fp16[dim.getDataLen()]()));
295 data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
296 delete[] mem_data->getAddr<__fp16>();
300 initializer = Initializer::NONE;
302 setDataType(Tdatatype::FP16);
304 // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
305 // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
306 // dim[1] == height, dim[2] == width, dim[3] == channel
307 if (t_type.format == Tformat::NCHW) {
308 for (unsigned int i = 0; i < batch(); ++i)
309 for (unsigned int j = 0; j < channel(); ++j)
310 for (unsigned int k = 0; k < height(); ++k)
311 for (unsigned int l = 0; l < width(); ++l)
312 this->setValue(i, j, k, l, d[i][j][k][l]);
314 for (unsigned int i = 0; i < batch(); ++i)
315 for (unsigned int j = 0; j < height(); ++j)
316 for (unsigned int k = 0; k < width(); ++k)
317 for (unsigned int l = 0; l < channel(); ++l)
318 this->setValue(i, l, j, k, d[i][j][k][l]);
323 * @brief Constructor of Tensor
324 * @note This constructor copies vector again. needs refactoring
325 * @param[in] d data for the Tensor
327 Tensor(std::vector<std::vector<std::vector<__fp16>>> const &d,
328 ml::train::TensorDim::TensorType t_type) :
329 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
332 * @brief Constructor of Tensor
333 * @note This constructor copies vector again. needs refactoring
334 * @param[in] d data for the Tensor with batch size one
336 Tensor(std::vector<std::vector<__fp16>> const &d,
337 ml::train::TensorDim::TensorType t_type) :
338 Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
341 * @brief Copy constructor of Tensor.
342 * @param[in] Tensor &
344 Tensor(const Tensor &rhs) = default;
347 * @brief Move constructor of Tensor.
348 * @param[in] Tensor &&
350 Tensor(Tensor &&rhs) noexcept = default;
353 * @brief Copy assignment operator.
354 * @param[in] rhs Tensor to be copied.
356 Tensor &operator=(const Tensor &rhs) = default;
359 * @brief Move assignment operator.
360 * @parma[in] rhs Tensor to be moved.
362 Tensor &operator=(Tensor &&rhs) noexcept = default;
365 * @brief Construct a new Tensor object from a buffer
366 * This will not copy buffer to a new tensor but directly uses it
369 * @param bytes buffer size in bytes
370 * @param d tensor dim
371 * @param offset offset to be used from current
372 * @return Tensor object
373 * @throws std::invalid_argument if buf is null
375 template <typename T = float>
376 static Tensor Map(T *buf, unsigned int bytes, const TensorDim &d,
378 if (d.getDataLen() == 0 || buf == nullptr) {
379 throw std::invalid_argument(
380 "[Tensor::Map] empty tensor dim is not allowed");
383 if (d.getDataLen() * sizeof(T) + offset > bytes) {
384 throw std::invalid_argument(
385 "Creating shared tensor of size bigger than tensor memory.");
390 tmp.strides = d.computeStrides();
391 /// Tensor does not own the memory
392 tmp.data = std::shared_ptr<MemoryData>(new MemoryData((void *)buf),
393 std::default_delete<MemoryData>());
399 friend void swap(Tensor &lhs, Tensor &rhs) noexcept {
400 std::swap(lhs.dim, rhs.dim);
401 std::swap(lhs.strides, rhs.strides);
402 std::swap(lhs.contiguous, rhs.contiguous);
403 std::swap(lhs.initializer, rhs.initializer);
404 std::swap(lhs.data, rhs.data);
405 std::swap(lhs.name, rhs.name);
409 * @brief Comparison operator overload
410 * @param[in] rhs Tensor to be compared with
412 bool operator==(const Tensor &rhs) const;
415 * @brief Comparison operator overload
416 * @param[in] rhs Tensor to be compared with
418 bool operator!=(const Tensor &rhs) const { return !(*this == rhs); }
421 * @brief Allocate memory for this tensor
426 * @brief Deallocate memory for this tensor
427 * @note This will not necessary free the memory as tensors share memory
435 * @brief Check if the tensor has memory allocated/assigned/associated
437 bool isAllocated() const { return data != nullptr; }
440 * @brief return value at specific location
441 * @param[in] batch batch location
442 * @param[in] c channel location
443 * @param[in] h height location
444 * @param[in] w width location
446 template <typename T = float>
447 const T &getValue(unsigned int batch, unsigned int c, unsigned int h,
448 unsigned int w) const noexcept {
449 return getValue<T>(getIndex(batch, c, h, w));
452 template <typename T = float>
453 T &getValue(unsigned int batch, unsigned int c, unsigned int h,
454 unsigned int w) noexcept {
455 return getValue<T>(getIndex(batch, c, h, w));
459 * @brief return value at specific location
460 * @param[in] idx location
462 template <typename T = float>
463 const T &getValue(unsigned int idx) const noexcept {
464 return getData<T>()[idx];
468 * @brief return value at specific location
469 * @param[in] idx location
471 template <typename T = float> T &getValue(unsigned int idx) noexcept {
472 return getData<T>()[idx];
476 * @brief Get the Value thinking that it is padded
477 * for example, for the tensor (virtually padded) below,
478 * getValue(0, 0, 2, 2, 1, 1, .0f) will return 5
479 * padding available for height and width axis for now
485 * @param b batch index
486 * @param c channel index
487 * @param h height index
488 * @param w width index
489 * @param ph padding height
490 * @param pw padding width
491 * @return float value
493 template <typename T = float>
494 const T getValuePaddedVirtual(unsigned int b, unsigned int c, unsigned int h,
495 unsigned int w, unsigned int ph,
497 T pad_value = 0) const EXCEPT_WHEN_DEBUG {
499 unsigned int padded_h = 2 * ph + h;
500 unsigned int padded_w = 2 * pw + w;
501 if (h > padded_h && w > padded_w) {
502 throw std::out_of_range(
503 "[Tensor::getValuePadded] trying to access out of range");
507 if (ph <= h && h < ph + height() && pw <= w && w < pw + width()) {
508 return getValue<T>(b, c, h - ph, w - pw);
515 * @brief Multiply value element by element immediately
516 * @param[in] value multiplier
517 * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right
518 * @retval #ML_ERROR_NONE Successful
520 int multiply_i(float const &value);
523 * @brief Multiply value element by element
524 * @param[in] value multiplier
525 * @retval Calculated Tensor
527 Tensor multiply(float const &value) const;
530 * @brief multiply value element by element
531 * @param[in] value multiplier
532 * @param[out] out out tensor to store the result
533 * @retval Calculated Tensor
535 Tensor &multiply(float const &value, Tensor &out) const;
538 * @brief Multiply Tensor Elementwise
539 * @param[in] m Tensor to be multiplied
540 * @param[in] beta scalar to multiply output with and add
541 * @retval #ML_ERROR_NONE successful
543 int multiply_i(Tensor const &m, const float beta = 0.0);
546 * @brief Multiply Tensor Element by Element ( Not the MxM )
547 * @param[in] m Tensor to be multiplied
548 * @param[in] beta scalar to multiply output with and add
549 * @retval Calculated Tensor
551 Tensor multiply(Tensor const &m, const float beta = 0.0) const;
554 * @brief Multiply Tensor Element by Element ( Not the MxM )
555 * @param[in] m Tensor to be multiplied
556 * @param[out] output Tensor to store the result
557 * @param[in] beta scalar to multiply output with and add
558 * @retval Calculated Tensor
560 Tensor &multiply(Tensor const &m, Tensor &output,
561 const float beta = 0.0) const;
564 * @brief Multiply Tensor Elementwise
565 * @param[in] m Tensor to be multiplied
566 * @param[in] beta scalar to multiply output with and add
567 * @retval #ML_ERROR_NONE successful
569 * @note support different strided inputs and output
570 * @note does not support broadcasting
572 * @todo merge this to multiply_i
574 int multiply_i_strided(Tensor const &m, const float beta = 0.0);
577 * @brief Multiply Tensor Element by Element ( Not the MxM )
578 * @param[in] m Tensor to be multiplied
579 * @param[in] beta scalar to multiply output with and add
580 * @retval Calculated Tensor
582 * @note support different strided inputs and output
583 * @note does not support broadcasting
585 * @todo merge this to multiply
587 Tensor multiply_strided(Tensor const &m, const float beta = 0.0) const;
590 * @brief Multiply Tensor Element by Element ( Not the MxM )
591 * @param[in] m Tensor to be multiplied
592 * @param[out] output Tensor to store the result
593 * @param[in] beta scalar to multiply output with and add
594 * @retval Calculated Tensor
596 * @note support different strided inputs and output
597 * @note does not support broadcasting
599 * @todo merge this to multiply
601 Tensor &multiply_strided(Tensor const &m, Tensor &output,
602 const float beta = 0.0) const;
605 * @brief Add Tensor Elementwise
606 * @param[in] m Tensor to be added
607 * @param[in] beta scalar to add output with and add
608 * @retval #ML_ERROR_NONE successful
610 * @note support different strided inputs and output
611 * @note does not support broadcasting
613 * @todo merge this to add_i
615 int add_i_strided(Tensor const &m, const float beta = 0.0);
618 * @brief Add Tensor Element by Element
619 * @param[in] m Tensor to be added
620 * @param[in] beta Value to be scale the added tensor
621 * @retval Calculated Tensor
623 * @note support different strided inputs and output
624 * @note does not support broadcasting
626 * @todo merge this to add
628 Tensor add_strided(Tensor const &m, const float beta = 0.0) const;
631 * @brief Add Tensor Element by Element
632 * @param[in] m Tensor to be added
633 * @param[out] output Tensor to store the result
634 * @param[in] beta Value to be scale the added tensor
635 * @retval Calculated Tensor
637 * @note support different strided inputs and output
638 * @note does not support broadcasting
640 * @todo merge this to add
642 Tensor &add_strided(Tensor const &m, Tensor &output,
643 const float beta = 0.0) const;
646 * @brief Divide value element by element immediately
647 * @param[in] value divisor
648 * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right
649 * @retval #ML_ERROR_NONE Successful
651 int divide_i(float const &value);
654 * @brief Divide value element by element
655 * @param[in] value Divisor
656 * @retval Calculated Tensor
658 Tensor divide(float const &value) const;
661 * @brief Divide value element by element
662 * @param[in] value Divisor
663 * @param[out] out out parameter to store the result
664 * @retval Calculated Tensor
666 Tensor ÷(float const &value, Tensor &out) const;
669 * @brief divide Tensor Elementwise
670 * @param[in] m Tensor to be multiplied
671 * @retval #ML_ERROR_NONE successful
673 int divide_i(Tensor const &m);
676 * @brief Divide Tensor Element by Element
677 * @param[in] m Divisor Tensor
678 * @retval Calculated Tensor
680 Tensor divide(Tensor const &m) const;
683 * @brief divide Tensor Elementwise
684 * @param[in] m Tensor to be multiplied
685 * @param[out] output Tensor to store the result
686 * @retval Calculated Tensor
688 Tensor ÷(Tensor const &m, Tensor &output) const;
691 * @brief Add Tensor Element immediately to target tensor without mem copy
692 * @param[in] value value to be added
693 * @retval #ML_ERROR_NONE Successful
694 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
696 int add_i(float const &value);
699 * @brief Add value Element by Element
700 * @param[in] value value to be added
701 * @retval Calculated Tensor
703 Tensor add(float const &value) const;
706 * @brief Add Tensor Element by Element
707 * @param[in] value value to be added
708 * @param[out] out Tensor to save output without allocating new memory
709 * @retval Calculated Tensor
711 Tensor &add(float const &value, Tensor &out) const;
714 * @brief Add Tensor Element by Element without mem copy
715 * @param[in] m Tensor to be added
716 * @param[out] alpha Values to be scaled
717 * @retval #ML_ERROR_NONE Successful
718 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
720 int add_i(Tensor const &m, float const alpha = 1);
723 * @brief Add Tensor Element by Element
724 * @param[in] m Tensor to be added
725 * @retval Calculated Tensor
727 Tensor add(Tensor const &m, float const alpha = 1) const;
730 * @brief Add Tensor Element by Element
731 * @param[in] m Tensor to be added
732 * @param[out] m Tensor to be out
733 * @retval Calculated Tensor
735 Tensor &add(Tensor const &m, Tensor &out, float const alpha = 1) const;
738 * @brief memcpyless version of subtract
739 * @param[in] value value to subtract
740 * @retval #ML_ERROR_NONE Successful
741 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
743 int subtract_i(float const &value);
746 * @brief subtract value Element by Element
747 * @param[in] value value to be subtracted
748 * @retval Calculated Tensor
750 Tensor subtract(float const &value) const;
753 * @brief Subtract Tensor Element by Element
754 * @param[in] value value to be added
755 * @param[out] out Tensor to save output without allocating new memory
756 * @retval Calculated Tensor
758 Tensor &subtract(float const &value, Tensor &out) const;
761 * @brief memcpyless version of subtract
762 * @param[in] m Tensor to be subtracted
763 * @retval #ML_ERROR_NONE Successful
764 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
766 int subtract_i(Tensor const &m);
769 * @brief Substract Tensor Element by Element
770 * @param[in] m Tensor to be subtracted
771 * @retval Calculated Tensor
773 Tensor subtract(Tensor const &m) const;
776 * @brief Subtract Tensor Element by Element
777 * @param[in] m Tensor to be added
778 * @param[out] m Tensor to be out
779 * @retval Calculated Tensor
781 Tensor &subtract(Tensor const &m, Tensor &out) const;
784 * @brief Tensor power elementwise
786 * @param exponent exponent
787 * @return int ML_ERROR_NONE if successful
789 int pow_i(float exponent);
792 * @brief Tensor power Element by Element
793 * @param[in] exponent exponent
794 * @retval Calculated Tensor
796 Tensor pow(float exponent) const;
799 * @brief Tensor power Element by Element
800 * @param[in] exponent exponent
801 * @param[out] out out to store the result
802 * @retval Calculated Tensor
804 Tensor &pow(float exponent, Tensor &out) const;
807 * @brief gaussian error function
808 * @return int ML_ERROR_NONE if successful
813 * @brief gaussian error function
814 * @retval Calculated Tensor
819 * @brief gaussian error function
820 * @param[out] out out to store the result
821 * @retval Calculated Tensor
823 Tensor &erf(Tensor &out) const;
825 unsigned int sizeofData() { return dim.getDataTypeSize(); }
828 * @brief Dot Product of Tensor ( equal MxM )
829 * @details This applies dot of the last dimension of this and second-last
830 * dimension of passed tensor m.
831 * @param[in] m Tensor
832 * @param[in] trans Transpose
833 * @param[in] trans_m Transpose m
834 * @retval Calculated Tensor
836 Tensor dot(Tensor const &m, bool trans = false, bool trans_m = false) const;
839 * @brief Dot Product of Tensor ( equal MxM )
840 * @details This applies dot of the last dimension of this and second-last
841 * dimension of passed tensor m.
842 * @param[in] m Tensor
843 * @param[in] output output Tensor
844 * @param[in] trans Transpose
845 * @param[in] trans_m Transpose m
846 * @param[in] beta beta
847 * @retval Calculated Tensor
849 Tensor &dot(Tensor const &m, Tensor &output, bool trans = false,
850 bool trans_m = false, float beta = 0.0f) const;
853 * @brief compute the derivative of this in the current tensor
854 * @param m same as given to the dot()
855 * @param output_deriv the derivative of the output
856 * @param[in] trans same as given to the dot()
857 * @param[in] trans_m same as given to the dot()
858 * @param[in] beta same as given to the dot()
859 * @note This will compute the derivative in-place and will overwrite existing
862 Tensor &dot_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv,
863 bool trans = false, bool trans_m = false,
867 * @brief compute the derivative wrt m in the m tensor
868 * @param m_deriv tensor where derivative wrt m will be stored
869 * @param output_deriv the derivative of the output
870 * @param[in] trans same as given to the dot()
871 * @param[in] trans_m same as given to the dot()
872 * @param[in] beta same as given to the dot()
873 * @note The caller tensor must be the same tensor as the one which called the
876 Tensor &dot_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv,
877 bool trans = false, bool trans_m = false,
878 float beta = 0.0f) const;
881 * @copydoc Tensor::dot(Tensor const &m, Tensor &output, bool trans,
882 bool trans_m, float beta) const
883 * @details performs dot operation over a batch of inputs
885 Tensor &dotBatched(Tensor const &m, Tensor &result, bool trans = false,
886 bool trans_m = false, float beta = 0.0f) const;
889 * @copydoc Tensor::dot_deriv_wrt_1(Tensor const &m, Tensor const
890 &output_deriv, bool trans, bool trans_m, float beta)
892 Tensor &dot_batched_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv,
893 bool trans = false, bool trans_m = false,
897 * @brief Tensor::dot_deriv_wrt_2(Tensor const &m_deriv, Tensor const
898 &output_deriv, bool trans, bool trans_m, float beta) const
900 Tensor &dot_batched_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv,
901 bool trans = false, bool trans_m = false,
902 float beta = 0.0f) const;
905 * @brief Transpose Tensor
907 * @param direction to transpose ex) 0:2:1
910 Tensor transpose(const std::string &direction) const;
913 * @brief Transpose Tensor
914 * @param direction to transpose ex) 0:2:1
915 * @param[out] Tensor to save to, dimension is always reshaped.
916 * @retval Tensor& reference to the out
918 Tensor &transpose(const std::string &direction, Tensor &out) const;
921 * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate)
922 * @param dropout drop out rate
923 * @retval Tensor& reference of drop out mask
925 Tensor dropout_mask(float dropout) const;
928 * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) inplace
929 * @param dropout drop out rate
931 void dropout_mask(float dropout);
934 * @brief Calculate filter mask
935 * @param mask_len length of each mask along the last axis
936 * @param invert invert the mask
938 void filter_mask(const Tensor &mask_len, bool reverse = false);
941 * @brief Calculate 2 Zone Out Mask
942 * @details Calculate zone out mask according to the bernoulli distribution.
943 * Zone out mask with rate @a zoneout for inplace and the other zone out mask
944 * with rate @a (1-zoneout).
945 * @param zoneout zone out rate
946 * @retval Tensor zone out mask for opposite tensor
948 Tensor zoneout_mask(float zoneout);
951 * @brief Calculate 2 Zone Out Mask
952 * @details Calculate zone out mask according to the bernoulli distribution.
953 * Zone out mask with rate @a zoneout for inplace and the other zone out mask
954 * with rate @a (1-zoneout).
955 * @param opposite opposite zone out mask
956 * @param zoneout zone out rate
958 void zoneout_mask(Tensor &opposite, float zoneout);
961 * @brief sum all the Tensor elements according to the batch
962 * @retval Calculated Tensor(batch, 1, 1, 1)
964 Tensor sum_by_batch() const;
967 * @brief sum all the Tensor elements according to the axis
968 * 0 : batch direction
969 * 1 : channel direction
970 * 2 : height direction
971 * 3 : width direction
972 * @param[in] axis Axis to calculate sum along
973 * @param[in] alpha Scale the sum by this value
974 * @retval Calculated Tensor
976 Tensor sum(unsigned int axis, float alpha = 1.0) const;
979 * @brief sum all the Tensor elements according to the axis
980 * 0 : batch direction
981 * 1 : channel direction
982 * 2 : height direction
983 * 3 : width direction
984 * @param[in] axis Axis to calculate sum along
985 * @param[out] output output tensor
986 * @param[in] alpha Scale the sum by this value
987 * @retval Calculated Tensor
989 Tensor &sum(unsigned int axis, Tensor &output, float alpha = 1.0,
990 float beta = 0.0) const;
993 * @brief sum all the Tensor by multiple axes
995 * @param axes axes to sum along
996 * @param alpha Scale the sum by this value
999 Tensor sum(const std::vector<unsigned int> &axes, float alpha = 1.0) const;
1002 * @brief sum all the Tensor by multiple axes
1004 * @param axes axes to sum along
1005 * @param[out] output output tensor
1006 * @param alpha Scale the sum by this value
1009 Tensor &sum(const std::vector<unsigned int> &axes, Tensor &output,
1010 float alpha = 1.0) const;
1013 * @brief Averaging the Tensor elements according to the axis
1014 * 0 : batch direction
1015 * 1 : channel direction
1016 * 2 : height direction
1017 * 3 : width direction
1018 * @retval Calculated Tensor
1020 Tensor average(unsigned int axis) const;
1022 * @brief Averaging the Tensor elements according to the axis
1024 * @retval Calculated Tensor
1026 Tensor &average(unsigned int axis, Tensor &output) const;
1029 * @brief average all the Tensor by multiple axes
1031 * @param axes axes to sum along
1034 Tensor average(const std::vector<unsigned int> &axes) const;
1037 * @brief average all the Tensor by multiple axes
1039 * @param axes axes to sum along
1040 * @param output output tensor
1043 Tensor &average(const std::vector<unsigned int> &axes, Tensor &output) const;
1046 * @brief Averaging the Tensor elements by all axis
1047 * @retval Calculated Tensor
1049 Tensor average() const;
1052 * @brief Averaging the Tensor elements by all axis
1053 * @retval Calculated Tensor
1055 Tensor &average(Tensor &output) const;
1058 * @brief Anchor a starting point to defer following evaluation
1059 * @retval LazyTensor class that can be used with run();
1061 LazyTensor chain() const;
1064 * @brief Softmax the Tensor elements
1065 * @retval Calculated Tensor
1067 Tensor softmax() const;
1070 * @brief l2norm the Tensor elements
1071 * @retval Calculated l2norm
1073 float l2norm() const;
1076 * @brief Normalize the Tensor elements
1077 * @retval Calculated Tensor
1079 Tensor &normalization(Tensor &output) const;
1082 * @brief Standardize the Tensor elements
1083 * @retval Calculated Tensor
1085 Tensor &standardization(Tensor &output) const;
1088 * @brief Normalize the Tensor elements in-place
1089 * @retval Calculated Tensor
1091 void normalization_i();
1094 * @brief Standardize the Tensor elements in-place
1095 * @retval Calculated Tensor
1097 void standardization_i();
1099 template <typename T = float> T *getAddress(unsigned int i) {
1100 size_t index = getIndex(batch(), channel(), height(), width());
1104 return &getData<T>()[i];
1108 * @brief i data index
1109 * @retval address of ith data
1111 template <typename T = float> const T *getAddress(unsigned int i) const {
1112 size_t index = getIndex(batch(), channel(), height(), width());
1117 return &getData<T>()[i];
1121 * @brief get address of n-d data
1123 template <typename T = float>
1124 T *getAddress(unsigned int b, unsigned int c, unsigned int h,
1126 return getAddress<T>(getIndex(b, c, h, w));
1130 * @brief get address of n-d data
1132 template <typename T = float>
1133 const T *getAddress(unsigned int b, unsigned int c, unsigned int h,
1134 unsigned int w) const {
1135 return getAddress<T>(getIndex(b, c, h, w));
1139 * @brief Apply instantly to the element
1141 * @param f function to apply
1142 * @return int ML_ERROR_NONE if successful
1144 int apply_i(std::function<float(float)> f) {
1145 Tensor result = *this;
1148 return ML_ERROR_NONE;
1152 * @brief Apply function element by element
1153 * @param[in] *function function pointer applied
1156 Tensor apply(std::function<float(float)> f) const {
1158 return apply(f, result);
1162 * @brief Apply function element by element
1163 * @param[in] *function function pointer applied
1164 * @param[out] output output tensor
1167 Tensor &apply(std::function<float(float)> f, Tensor &output) const {
1168 CREATE_IF_EMPTY_DIMS(output, dim, nullptr);
1170 if (dim != output.dim) {
1171 /// @todo add unittest
1172 throw std::invalid_argument(
1173 "[Tensor::apply] output dimension does not match");
1176 if (dim.getDataType() == Tdatatype::FP32) {
1177 if (contiguous && output.contiguous) {
1178 const float *data = (getData<float>());
1179 float *rdata = (output.getData<float>());
1181 std::transform(data, data + size(), rdata, f);
1182 } else if (strides[3] == 1 && output.strides[3] == 1) {
1183 /** @todo optimize this with combining these loops where stride is 1 */
1184 for (unsigned int b = 0; b < batch(); ++b) {
1185 for (unsigned int c = 0; c < channel(); ++c) {
1186 for (unsigned int h = 0; h < height(); ++h) {
1187 float *out_data = output.getAddress<float>(b, c, h, 0);
1188 const float *in_data = getAddress<float>(b, c, h, 0);
1189 std::transform(in_data, in_data + width(), out_data, f);
1194 for (unsigned int b = 0; b < batch(); ++b) {
1195 for (unsigned int c = 0; c < channel(); ++c) {
1196 for (unsigned int h = 0; h < height(); ++h) {
1197 for (unsigned int w = 0; w < width(); ++w) {
1198 output.setValue(b, c, h, w, f(getValue<float>(b, c, h, w)));
1204 } else if (dim.getDataType() == Tdatatype::FP16) {
1205 if (contiguous && output.contiguous) {
1206 const __fp16 *data = (getData<__fp16>());
1207 __fp16 *rdata = (output.getData<__fp16>());
1209 std::transform(data, data + size(), rdata, f);
1210 } else if (strides[3] == 1 && output.strides[3] == 1) {
1211 /** @todo optimize this with combining these loops where stride is 1 */
1212 for (unsigned int b = 0; b < batch(); ++b) {
1213 for (unsigned int c = 0; c < channel(); ++c) {
1214 for (unsigned int h = 0; h < height(); ++h) {
1215 __fp16 *out_data = (__fp16 *)output.getAddress(b, c, h, 0);
1216 const __fp16 *in_data = (__fp16 *)getAddress(b, c, h, 0);
1217 std::transform(in_data, in_data + width(), out_data, f);
1222 for (unsigned int b = 0; b < batch(); ++b) {
1223 for (unsigned int c = 0; c < channel(); ++c) {
1224 for (unsigned int h = 0; h < height(); ++h) {
1225 for (unsigned int w = 0; w < width(); ++w) {
1226 output.setValue(b, c, h, w,
1227 f((float)((__fp16)getValue(b, c, h, w))));
1239 * @brief Apply function to Tensor
1240 * @param[in] *function function pointer applied
1243 Tensor apply(std::function<Tensor(Tensor)> f) const;
1246 * @brief Apply function to Tensor
1247 * @param[in] *function function pointer applied
1248 * @param[out] output output tensor
1251 Tensor &apply(std::function<Tensor &(Tensor, Tensor &)> f,
1252 Tensor &output) const;
1255 * @brief Print element
1256 * @param[in] out out stream
1259 void print(std::ostream &out) const;
1262 * @brief Print element
1263 * @param[in] out out stream
1264 * @param[in] opt print formatting option. opt=0 would pretty print the data,
1265 * else it would print the raw data.
1268 void print_(std::ostream &out, uint opt = 0) const;
1271 * @brief Get size of current tensor
1272 * @retval unsigned int size of the current tensor
1274 size_t size() const { return dim.getDataLen(); }
1277 * @brief Get if the tensor is empty
1278 * @retval true if the tensor is empty
1280 bool empty() const { return size() == 0; }
1283 * @brief Get size of the data in bytes
1284 * @retval size_t Size in bytes
1286 size_t bytes() const { return size() * dim.getDataTypeSize(); }
1289 * @brief Set the element value
1290 * @param[in] batch batch location
1291 * @param[in] c channel location
1292 * @param[in] h height location
1293 * @param[in] w width location
1294 * @param[in] value value to be stored
1296 void setValue(unsigned int batch, unsigned int c, unsigned int h,
1297 unsigned int w, float value) noexcept {
1298 if (getDataType() == Tdatatype::FP32) {
1299 getData<float>()[getIndex(batch, c, h, w)] = value;
1300 } else if (getDataType() == Tdatatype::FP16) {
1301 getData<__fp16>()[getIndex(batch, c, h, w)] = value;
1306 * @brief add the element value to the location
1307 * @param[in] batch batch location
1308 * @param[in] c channel location
1309 * @param[in] h height location
1310 * @param[in] w width location
1311 * @param[in] value value to be stored
1312 * @param[in] beta scalar to multiply output with and add
1314 void addValue(unsigned int batch, unsigned int c, unsigned int h,
1315 unsigned int w, float value, float beta) noexcept {
1316 auto const &idx = getIndex(batch, c, h, w);
1317 if (dim.getDataType() == Tdatatype::FP32) {
1318 getData<float>()[idx] *= beta;
1319 getData<float>()[idx] += value;
1320 } else if (dim.getDataType() == Tdatatype::FP16) {
1321 getData<__fp16>()[idx] *= beta;
1322 getData<__fp16>()[idx] += value;
1327 * @brief Set the element value
1328 * @param[in] offset offset from start location
1329 * @param[in] value value to be stored
1331 * @todo This is a temporary workout. Remove this once multiple datatypes
1334 void setValueInt(unsigned int offset, int value) noexcept {
1335 int *data_int = (int *)getData();
1336 data_int[offset] = value;
1340 * @brief Fill the Tensor elements with value
1341 * @param[in] value value to be stored
1343 void setValue(float value);
1346 * @brief Fill the Tensor elements with zero
1351 * @brief Set the Dist object
1353 * @tparam T distrubution engine
1354 * @param dist distribution engine
1356 template <typename T, typename Engine> void setDist(Engine dist) {
1357 NNTR_THROW_IF(!contiguous, std::invalid_argument)
1358 << getName() << " Tensor is not contiguous, cannot set distribution";
1360 T *data_ = getData<T>();
1361 unsigned int len = size();
1362 for (unsigned int i = 0; i < len; ++i) {
1363 data_[i] = (T)dist(rng);
1368 * @brief Set the tensor with random normal distribution
1369 * @param[in] mean mean of the distribution
1370 * @param[in] std standard deviation of the distribution
1372 void setRandNormal(float mean = 0.0f, float std = 0.05f);
1375 * @brief Set the tensor with random uniform distribution
1376 * @param[in] min minimum value for the distribution
1377 * @param[in] max maximum value for the distribution
1379 void setRandUniform(float min = -0.05f, float max = 0.05f);
1382 * @brief Set the tensor with random bernoulli distribution
1383 * @param[in] probability probability value for the distribution
1385 void setRandBernoulli(float probability = 0.5f);
1388 * @brief Initialize the memory of the given tensor
1393 * @brief Initialize the memory of the given tensor
1394 * @param init Initiailizer to use for the initialization
1396 void initialize(Initializer init) {
1402 * @brief set the memory format
1403 * @param fm format of Tensor
1405 void convertFormat(TensorDim::Format fm) {
1406 if (getFormat() != fm) {
1414 * @brief Copy the Tensor
1415 * @param[in] from Tensor to be copied
1417 * @note copy can reshape the tensor to match the shape
1419 void copy(const Tensor &from);
1422 * @brief Copy the Tensor
1423 * @param[in] from Tensor to be copied
1425 void copyData(const Tensor &from);
1428 * @brief Copy the Tensor
1429 * @param[in] from Tensor to be copied
1431 void copy_with_stride(const Tensor &from);
1434 * @brief Get slice of the tensor, sliced by batch
1435 * @param[in] offset offset in batch to start the slice
1436 * @param[in] size size of the slice
1437 * @retval slice of this tensor
1438 * @note This function provides a slice of this tensor, and does not create a
1441 Tensor getBatchSlice(size_t offset, unsigned int size) const;
1444 * @brief Get new tensor which shares memory with current tensor but different
1447 * @param dim new dimension to be set for this tensor
1448 * @param offset offset to be used from the start of the data in elements
1449 * @note The new tensor will share the same data as the current tensor but
1450 * can have different size.
1451 * @note New size added with offset must be less than the size of the original
1454 Tensor getSharedDataTensor(const TensorDim dim, size_t offset,
1455 bool reset_stride = true,
1456 const std::string &name_ = "") const;
1458 * @brief split tensor along axis.
1460 * @param num_size num_size
1462 * @return Tensor splitted tensor
1464 std::vector<Tensor> split(unsigned num_size, int axis = 0);
1467 * @brief split tensor along axis.
1469 * @param sizes sizes
1471 * @return Tensor splitted tensor
1472 * @note if the given array sizes is just a 1 unsigned int value, assumes that
1473 * it divide tensor by given size evenly
1475 std::vector<Tensor> split(std::vector<size_t> sizes, int axis = 0);
1478 * @brief concatenate tensors along axis
1480 * @param tensors tensors to be concatenated to the first tensor
1482 * @return Tensor concatenated tensor
1484 static Tensor cat(const std::vector<Tensor> &tensors, int axis = 0);
1487 * @brief make this tensor share memory with given tensor
1489 * @param src Source tensor whose memory is to be shared
1490 * @param offset offset to be used from the start of the data in bytes
1491 * @note This tensor will share the same data as the current tensor but
1492 * can have different size.
1493 * @note This tensor's size added with offset must be less than the size of
1494 * the source tensor.
1495 * @note The stride of the source tensor and this tensor must be same.
1497 void makeSharedDataTensor(const Tensor &src, size_t offset = 0);
1500 * @brief Convient wrapper for inplace copy of @a this.
1501 * @retval Copied version of this
1503 Tensor clone() const;
1506 * @brief Save the Tensor into file
1507 * @param[in] file output file stream
1509 void save(std::ostream &file);
1512 * @brief Read the Tensor from file
1513 * @param[in] file input file stream
1515 void read(std::ifstream &file);
1518 * @brief return argument index which value is max by batch
1519 * @retval unsigned int argument index
1521 std::vector<unsigned int> argmax() const;
1524 * @brief return max of the absolute values of the tensor
1525 * @retval maximum absolute value
1527 float max_abs() const;
1530 * @brief return a copy of the Tensor Dim
1533 TensorDim getDim() const { return TensorDim(dim); }
1536 * @brief return Tensor Dim for a given axis
1539 size_t getTensorDim(unsigned int axis);
1542 * @brief return Tensor Type
1544 TensorDim::TensorType getTensorType() const { return dim.getTensorType(); };
1547 * @brief return Tensor batch size
1548 * @retval batch size
1550 size_t batch() const { return dim.batch(); }
1553 * @brief return Tensor batch size
1554 * @retval batch size
1556 size_t channel() const { return dim.channel(); }
1559 * @brief return Tensor height size
1560 * @retval height size
1562 size_t height() const { return dim.height(); }
1565 * @brief return Tensor batch size
1566 * @retval width size
1568 size_t width() const { return dim.width(); }
1571 * @brief return Tensor Data Type Size
1572 * @retval data type size
1574 uint getDataTypeSize() const { return dim.getDataTypeSize(); }
1577 * @brief update batch size for this tensor
1579 * @note The batchsize of src_tensor need not be related with this
1580 * tensor's batch size
1582 * @note The memory for this tensor will re-allocated/re-assigned if the
1583 * updated batch size is different than the current batch size.
1585 * @note If this tensor is/was the src_tensor for some other, then
1586 * reduction in batch size can make the dependent tensors allocate fail due to
1587 * memory smaller. Caller must handle this in their own end.
1589 * @note If this tensor is re-allocated, then the memory might not be
1590 * immediately freed as the tensor already depending on this tensor also
1591 * share the same memory. So, the peak memory consumption in worst case can
1592 * reach the total memory requirements of a model with old batchsize and the
1593 * new batch size. It is recommended to first deallocate all the tensors,
1594 * updateBatch and then allocate again to avoid such issues.
1596 void updateBatch(unsigned int batch) {
1597 if (dim.batch() == batch) {
1602 throw std::invalid_argument(
1603 "Cannot update batch for an allocated tensor");
1608 * @brief return Data pointer of Tensor
1609 * @retval template T pointer (float pointer as default)
1611 template <typename T = float> T *getData() {
1616 return (T *)((data->getAddr<T>()) + offset);
1620 * @brief return Data pointer of Tensor
1621 * @retval template T pointer (float pointer as default)
1623 template <typename T = float> const T *getData() const {
1628 return (T *)(data->getAddr<T>() + offset);
1632 * @brief return Data pointer of Tensor
1633 * @retval template T pointer (float pointer as default)
1635 template <typename T = float> T *getData(size_t idx) const {
1639 size_t index = idx * sizeof(T);
1642 return (T *)(data->getAddr<T>() + offset + index);
1645 void setDataType(Tdatatype d_type) { dim.setDataType(d_type); }
1647 void setTensorType(ml::train::TensorDim::TensorType t_type) {
1648 dim.setTensorType(t_type);
1652 * @brief put data of Tensor
1654 * @note It is only effective when memory_swap is used
1656 void putData() const {
1664 * @brief return Data pointer of Tensor
1665 * @retval template T pointer (float pointer as default)
1667 const std::shared_ptr<MemoryData> getMemoryData() const { return data; }
1670 * @brief return offset
1672 unsigned int getOffset() const { return offset; }
1675 * @brief i data index
1676 * @retval address of ith data
1679 * @brief set Tensor Dim
1680 * @param[in] d TensorDim
1681 * @note Throws std::invalid_argument if size mismatch
1683 void reshape(const TensorDim &d);
1686 * @brief fill tensor data with current value,
1687 * if dimension is not exactly same, it is a hard error in this function
1688 * so, only stride is overriden to @a this
1690 * @param from Tensor to fill the data from
1691 * @param allocate if unallocated, allocate with from.getDim()
1692 * @throws std::invalid_argument if dimension and stride does not match
1694 void fill(const Tensor &from, bool allocate = false);
1697 * @brief return current stride of tensor.
1698 * @retval int[MAXDIM] strides
1700 const std::array<size_t, TensorDim::MAXDIM> getStrides() const noexcept {
1704 * @brief Get linear index given the n-d index
1706 inline size_t getIndex(unsigned int b, unsigned int c, unsigned int h,
1707 unsigned int w) const noexcept {
1708 if (getFormat() == Tformat::NCHW) {
1709 return (b * strides[0] + c * strides[1] + h * strides[2] +
1712 return (b * strides[0] + h * strides[1] + w * strides[2] +
1718 * @brief Check if two given axes are contiguous
1720 bool checkContinuous(unsigned int n, unsigned int np1) const {
1721 std::vector<unsigned int> continuous_order_nhwc = {0, 3, 1, 2};
1722 bool continuous = false;
1723 if (getFormat() == Tformat::NHWC) {
1724 if (continuous_order_nhwc[np1] == continuous_order_nhwc[n] + 1)
1734 * @brief Get name of the tensor
1736 * @return name of the tensor
1738 void setName(const std::string &name_) { name = name_; }
1741 * @brief Get name of the tensor
1743 * @return name of the tensor
1745 const std::string &getName() const { return name; }
1748 * @brief Set the memory buffer for the tensor
1750 * @param buf the memory buffer
1751 * @param init intialize the buffer
1753 void setData(const std::shared_ptr<MemoryData> buf, unsigned int off = 0,
1754 bool init = false) {
1767 * @brief Get initializer for the tensor
1769 * @return initializer of the tensor
1771 Tensor::Initializer getInitializer() const { return initializer; }
1774 * @brief Get format for the tensor
1776 * @return format of the tensor
1778 TensorDim::Format getFormat() const { return dim.getFormat(); }
1781 * @brief Get data type for the tensor
1783 * @return data type of the tensor
1785 Tdatatype getDataType() const { return dim.getDataType(); }
1787 static constexpr float epsilon = 1e-5;
1790 /**< handle the data as a std::shared_ptr<float> type */
1792 std::array<size_t, TensorDim::MAXDIM> strides;
1794 Tensor::Initializer initializer;
1795 std::string name; /**< name of the tensor */
1796 std::shared_ptr<MemoryData> data;
1797 unsigned int offset;
1800 * When using shared_data with tensor, this stores the ptr of the source
1801 * tensor which handles the full memory. If tensor data is already allocated,
1802 * this does not affect the tensor. If the tensor data is not allocated, and
1803 * src_ptr is valid, this tensor will use the memory allocated by the src_ptr
1805 std::shared_ptr<SrcSharedTensor> src_tensor;
1807 struct BroadcastInfo;
1810 * @brief Applies the given operator to the tensor with the passed argument
1811 * @param[in] m Tensor
1812 * @param[in] v_func vectorized function to apply
1813 * @param e broadcast info.
1814 * @param cur_axis current axis. pass default when calling outside.
1815 * @param offset offset for this. pass default when calling outside.
1816 * @param m_offset offset for m. pass default when calling outside.
1817 * @retval #ML_ERROR_NONE Successful
1818 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
1821 apply_broadcast_util(Tensor const &m,
1822 std::function<void(const BroadcastInfo &e, const float *,
1823 const float *, float *)>
1825 Tensor &output, const BroadcastInfo &e,
1826 int cur_axis = -1, size_t offset = 0,
1827 size_t m_offset = 0) const;
1829 void apply_broadcast_util(
1831 std::function<void(const BroadcastInfo &e, const __fp16 *, const __fp16 *,
1834 Tensor &output, const BroadcastInfo &e, int cur_axis = -1,
1835 size_t offset = 0, size_t m_offset = 0) const;
1838 * @brief Applies the given operator to the tensor with the passed argument
1840 * @param[in] m Tensor
1841 * @param[in] v_func vectorized function to apply
1842 * @retval #ML_ERROR_NONE Successful
1843 * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
1845 void apply_broadcast(Tensor const &m,
1846 std::function<void(const BroadcastInfo &e, const float *,
1847 const float *, float *)>
1849 Tensor &output) const;
1852 apply_broadcast(Tensor const &m,
1853 std::function<void(const BroadcastInfo &e, const __fp16 *,
1854 const __fp16 *, __fp16 *)>
1856 Tensor &output) const;
1859 * @brief compute Loop info for broadcasting and vectorization
1861 * @param m target tensor to be calculated against.
1862 * @return BroadcastInfo Loopinfo needed to run external loop
1864 BroadcastInfo computeBroadcastInfo(const Tensor &m) const;
1867 * @brief copy a buffer to @a this, the caller has to ensure that @a this is
1868 * initialized otherwise undefined behavior
1870 * @param buf buffer to copy from
1872 void copy(const void *buf);
1875 * @brief Update destination tensor to share memory with source tensor
1877 * @param src src tensor containing the memory
1878 * @param dest destination tensor which will share the memory
1879 * @param offset offset to be used from the start of the data in bytes
1880 * @note The new tensor will share the same data as the current tensor but
1881 * can have different size.
1882 * @note New size added with offset must be less than the size of the original
1885 static void createSharedDataTensor(const Tensor &src, Tensor &dest,
1889 * @brief Reallocate memory for this tensor
1890 * @note This will not necessary free the memory as tensors share memory
1891 * @note This can increase the peak memory consumption when callled on all
1892 * the tensors of a model sequentially. It is advised to first deallocate all
1893 * the tensors and then allocate, than reallocate tensors one by one.
1901 * @brief Merge the given two axis for tensor at second axis inplace
1903 * @param axis1 first axis to merge
1904 * @param axis2 second axis to merge
1906 void mergeAxis(unsigned int axis1, unsigned int axis2);
1909 * @brief rotate 180 dgree
1910 * @param[in] in input Tensor
1911 * @retVal Tensor rotated tensor (180 degree)
1913 Tensor rotate_180(Tensor in);
1915 }; // namespace nntrainer
1918 * @brief Overriding output stream
1920 std::ostream &operator<<(std::ostream &out, Tensor const &m);
1922 typedef std::shared_ptr<Tensor> sharedTensor;
1924 typedef std::shared_ptr<const Tensor> sharedConstTensor;
1926 typedef std::vector<sharedConstTensor> sharedConstTensors;
1928 typedef std::vector<sharedTensor> sharedTensors;
1930 } /* namespace nntrainer */
1932 #endif /* __cplusplus */
1933 #endif /* __TENSOR_H__ */