1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP
8 #include "nvcc_defs.hpp"
14 #include "../cxx_utils/resizable_static_array.hpp"
15 #include "../cxx_utils/is_iterator.hpp"
17 #include <opencv2/core.hpp>
21 #include <type_traits>
30 #ifndef CSL_MAX_TENSOR_RANK
31 #define CSL_MAX_TENSOR_RANK 6
34 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
38 * TYPE | OWNERSHIP | MUTABLE
39 * ------------ + --------- + --------
41 * TensorSpan | No | Yes
42 * TensorView | No | No
44 * Tensor is implicitly convertible to TensorSpan and TensorView
45 * TensorSpan is implicitly convertible to TensorView
47 * Concepts and template parameter naming convention:
48 * - "MutableTensorType" can refer to a Tensor or TensorSpan
49 * - "ImmutableTensorType" can refer to a Tensor, TensorSpan or TensorView
50 * - "TensorType" can refer to a Tensor, TensorSpan or TensorView
52 * "ImmutableTensorType" is used when the tensor data might be used.
53 * "TensorType" is used when only meta-information such as the size or shape is required, i.e. the data won't be touched
56 /** if the \p axis is a negative index, the equivalent postive index is returned; otherwise, returns \p axis */
57 CUDA4DNN_HOST_DEVICE constexpr std::size_t clamp_axis(int axis, std::size_t rank) {
58 return axis < 0 ? axis + rank : axis;
61 /** @brief multi-dimensional contiguous non-copyable GPU tensor
63 * \tparam T type of data stored
65 * @note scalars or zero rank tensors are not supported
66 * @note the maximum rank supported is controlled by the `CSL_MAX_TENSOR_RANK` preprocessor symbol
70 static_assert(std::is_standard_layout<T>::value, "T must staisfy StandardLayoutType");
73 using value_type = typename ManagedPtr<T>::element_type;
74 using pointer = typename ManagedPtr<value_type>::pointer;
75 using const_pointer = typename ManagedPtr<value_type>::const_pointer;
76 using size_type = typename ManagedPtr<value_type>::size_type;
79 Tensor(const Tensor&) = delete;
80 Tensor(Tensor&& other) noexcept {
81 data = std::move(other.data);
86 /** @brief constructs a tensor of a specific shape
88 * Whatever arguments are accepted by the resize methods are accepted here.
90 template <class ...Args>
91 Tensor(Args&&... sizes) { resize(std::forward<Args>(sizes)...); }
93 Tensor& operator=(const Tensor&) = delete;
94 Tensor& operator=(Tensor&& other) noexcept {
95 data = std::move(other.data);
101 /** returns true if the tensor is empty (or uninitialized) */
102 bool empty() const noexcept { return shape.size() == 0; }
104 /** returns the total number of elements in the tensor
107 * - tensor must be non-empty
109 size_type size() const noexcept {
111 return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
114 /** returns the rank of the tensor
117 * - tensor must be non-empty
119 size_type rank() const noexcept {
124 /** @brief returns the length of the axis
126 * Every axis is assigned a zero-based index which can be used to select an axis.
127 * Negative index can be used to select an axis from the end.
130 * > -1 represents the last axis
131 * > 0 represents the first axis
132 * > 1 represents the second axis
135 * - tensor must be non-empty
136 * - the axis must be in the range [-rank(), rank())
138 size_type get_axis_size(int axis) const noexcept {
139 axis = clamp_axis(axis, rank());
140 CV_Assert(axis >= 0 && axis < rank());
144 /** @brief returns the combined size of the axes in an axis range
146 * if the shape is [3 x 5 x 7 x 11]
147 * - `size_range(0, 2)` will return 3 x 5 = 15
148 * - `size_range(1, 3)` will return 5 x 7 = 35
149 * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
152 * - tensor must be non-empty
153 * - `axis_start` must be less than or equal to `axis_end`
154 * - `axis_end` must be less than or equal to the rank
156 * returns one if the two `axis_start` and `axis_end` are equal
158 size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
160 CV_Assert(axis_start <= axis_end);
161 CV_Assert(axis_end <= rank());
162 auto start = std::begin(shape) + axis_start;
163 auto end = std::begin(shape) + axis_end;
164 return std::accumulate(start, end, 1, std::multiplies<size_type>());
167 /** returns an std::vector containing axis lengths starting from axis zero
170 * - tensor must be non-empty
172 * Exception Guarantee: Strong
174 std::vector<size_type> shape_as_vector() const {
176 return std::vector<size_type>(std::begin(shape), std::end(shape));
179 /** returns a pointer to mutable device memory owned by the tensor */
180 pointer get() noexcept { return data.get(); }
182 /** returns a pointer to immutable device memory owned by the tensor */
183 const_pointer get() const noexcept { return data.get(); }
185 /** @brief releases the memory owned by the tensor
188 * - tensor must be non-empty
190 * Exception Guarantee: Strong
198 /** @brief resizes the tensor
201 * - [start, end) represents a forward range containing the length of the axes in order starting from axis zero
202 * - number of lengths provided must not exceed the maximum tensor rank (CSL_MAX_TENSOR_RANK)
203 * - the sizes must be positive integers
205 * Exception Guarantee: Strong
207 template <class ForwardItr>
208 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
209 ::type resize(ForwardItr start, ForwardItr end) {
210 CV_Assert(start != end);
211 CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
213 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
214 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
217 shape.assign(start, end);
220 /** @brief resizes the tensor
221 * constructs a range out of the arguments and invokes the range-based resize method
223 template <class ...Sizes>
224 void resize(Sizes... new_sizes_) {
225 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
226 static_assert(sizeof...(Sizes) > 0, "no sizes provided");
227 std::array<size_type, sizeof...(Sizes)> new_sizes = { static_cast<size_type>(new_sizes_)... };
228 resize(std::begin(new_sizes), std::end(new_sizes));
231 /** @brief resizes the tensor
234 * - the reference tensor must be non-empty
236 * Exception Guarantee: Strong
238 template <class TensorType>
239 void resize_as(const TensorType& tensor) {
240 CV_Assert(!tensor.empty());
241 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
242 for (int i = 0; i < new_sizes.size(); i++)
243 new_sizes[i] = tensor.get_axis_size(i);
244 resize(std::begin(new_sizes), std::end(new_sizes));
247 /** @brief reshapes the tensor
250 * The length of at most one axis can be deduced using the total size constraint. The axis can
251 * be marked for deduction by specifying the size as -1.
253 * The axes for which no size was provided (excluding -1) will be assumed to be one.
256 * - the tensor must be non-empty
257 * - [start, end) represents a forward range containing the length of the axes starting from axis zero
258 * - the number of lengths provided must be less than or equal to the tensor rank
259 * - at most one axis length is allowed for length deduction
260 * - the lengths provided must ensure that the total number of elements remains unchanged
262 * Exception Guarantee: Strong
264 template <class ForwardItr>
265 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
266 ::type reshape(ForwardItr start, ForwardItr end) {
267 CV_Assert(start != end);
268 CV_Assert(std::distance(start, end) <= rank());
270 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
272 /* the user may leave at most one axis size for deduction by specifying -1 */
273 auto sizes_to_deduce = std::count(start, end, -1);
274 if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
276 /* sizes must be positive numbers with the exception of -1 */
277 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
278 return !(x > 0 || x == -1);
280 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
282 /* compute the total number of elements in the new tensor */
283 size_type unknown_size = 0;
284 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
286 /* there is an unknown size */
287 if (std::abs(total) <= size()) {
288 unknown_size = size() / std::abs(total);
291 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
292 ** Since `total` is negative, the size check which follows will fail and throw an error
296 /* the number of elements before and after reshape must be exactly same */
297 if (total != size()) {
298 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
301 /* we assume the size of the unspecified axes to be one */
302 std::fill(std::begin(shape), std::end(shape), 1);
303 std::copy_backward(start, end, std::end(shape));
305 /* replace the unknown axis with the correct value */
306 std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
309 /** @brief reshapes the tensor
310 * constructs a range out of the arguments and invokes range-based reshape method
312 template <class ...Sizes>
313 void reshape(Sizes... new_sizes_) {
314 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
315 static_assert(sizeof...(Sizes) > 0, "no sizes provided");
316 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
317 reshape(std::begin(new_sizes), std::end(new_sizes));
320 /** @brief reshapes the tensor
323 * - the reference tensor must be a non-empty tensor
324 * - the reference tensor's rank must be lesser than or equal to the rank of target tensor
326 * Exception Guarantee: Strong
328 template <class TensorType>
329 void reshape_as(const TensorType& tensor) {
330 CV_Assert(!tensor.empty());
331 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
332 for (int i = 0; i < new_sizes.size(); i++)
333 new_sizes[i] = tensor.get_axis_size(i);
334 reshape(std::begin(new_sizes), std::end(new_sizes));
337 /** @brief squeezes the tensor
339 * removes all axes of unit size
342 * - the tensor must be non-empty
343 * - the tensor's rank must be at least two
345 * Exception Guarantee: Strong
349 CV_Assert(rank() >= 2);
350 auto itr = std::remove(std::begin(shape), std::end(shape), 1);
351 shape.resize(itr - std::begin(shape));
354 /** @brief squeezes the tensor
356 * removes the specified axis if the axis length is one; otherwise, ignores the request
359 * - the tensor must be non-empty
360 * - the tensor's rank must be at least two
362 * Exception Guarantee: Strong
364 void squeeze(int axis) {
366 CV_Assert(rank() >= 2);
367 axis = clamp_axis(axis, rank());
368 CV_Assert(axis >= 0 && axis < rank());
369 shape.erase(std::begin(shape) + axis);
372 /** @brief unsqueezes the tensor
374 * adds a axis of unit size at the requested before the specified axis
377 * - the tensor must be non-empty
378 * - the tensor's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
380 * Exception Guarantee: Strong
382 void unsqueeze(int axis = 0) {
384 CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
385 axis = clamp_axis(axis, rank());
386 CV_Assert(axis >= 0 && axis < rank());
387 shape.insert(std::begin(shape) + axis, 1);
390 operator Span<T>() noexcept { return Span<T>(data.get(), size()); }
391 operator View<T>() const noexcept { return View<T>(data.get(), size()); }
393 friend void swap(Tensor& lhs, Tensor& rhs) noexcept {
395 swap(lhs.data, rhs.data);
396 swap(lhs.shape, rhs.shape);
400 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
401 ManagedPtr<value_type> data;
404 /** @brief provides a non-owning mutable span of a Tensor
406 * \tparam T type of data stored by the tensor
408 * A span is valid if and only if the following hold true:
409 * - span is non-empty
410 * - spanned memory is still allocated
412 * A span may be used if and only if it is valid.
417 using value_type = typename Tensor<T>::value_type;
418 using pointer = typename Tensor<T>::pointer;
419 using const_pointer = typename Tensor<T>::const_pointer;
420 using size_type = typename Tensor<T>::size_type;
422 TensorSpan() noexcept : ptr{ nullptr } { }
423 TensorSpan(const TensorSpan&) noexcept = default;
424 TensorSpan(Tensor<T>& tensor) noexcept : ptr{ tensor.get() } {
425 const auto rank = tensor.rank();
427 for (int i = 0; i < rank; i++)
428 shape[i] = tensor.get_axis_size(i);
431 template <class ForwardItr>
432 TensorSpan(pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } {
433 CV_Assert(start != end);
434 CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
436 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
437 if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) {
438 CV_Error(Error::StsBadArg, "the given shape contains negative or zero size");
441 shape.assign(start, end);
444 /** creates a subspan of a tensor (or span); refer to subspan method for more details */
445 template <class... Args>
446 TensorSpan(TensorSpan other, size_type offset, Args&&... args)
447 : TensorSpan(other.subspan(offset, std::forward<Args>(args)...)) { }
449 /** returns true if the span is empty */
450 bool empty() const noexcept { return shape.size() == 0; }
452 /** returns the total number of elements in the span
455 * - span must be non-empty
457 size_type size() const noexcept {
459 return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
462 /** returns the rank of the span
465 * - span must be non-empty
467 size_type rank() const noexcept {
472 /** @brief returns the length of the axis
474 * Every axis is assigned a zero-based index which can be used to select an axis.
475 * Negative index can be used to select an axis from the end.
478 * > -1 represents the last axis
479 * > 0 represents the first axis
480 * > 1 represents the second axis
483 * - span must be non-empty
484 * - the axis must be in the range [-rank(), rank())
486 size_type get_axis_size(int axis) const noexcept {
487 axis = clamp_axis(axis, rank());
488 CV_Assert(axis >= 0 && axis < rank());
492 /** @brief returns the combined size of the axes in an axis range
494 * if the shape is [3 x 5 x 7 x 11]
495 * - `size_range(0, 2)` will return 3 x 5 = 15
496 * - `size_range(1, 3)` will return 5 x 7 = 35
497 * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
500 * - span must be non-empty
501 * - `axis_start` must be less than or equal to `axis_end`
502 * - `axis_end` must be less than or equal to the rank
504 * returns one if the two `axis_start` and `axis_end` are equal
506 size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
508 CV_Assert(axis_start <= axis_end);
509 CV_Assert(axis_end <= rank());
510 auto start = std::begin(shape) + axis_start;
511 auto end = std::begin(shape) + axis_end;
512 return std::accumulate(start, end, 1, std::multiplies<size_type>());
515 /** returns an std::vector containing axis lengths starting from axis zero
518 * - span must be non-empty
520 * Exception Guarantee: Strong
522 std::vector<size_type> shape_as_vector() const {
524 return std::vector<size_type>(std::begin(shape), std::end(shape));
527 /** returns a pointer to mutable device memory */
528 pointer get() const noexcept { return ptr; }
530 /** @brief clears the span
533 * - span must be non-empty
535 * Exception Guarantee: Strong
537 void clear() noexcept {
543 /** @brief reshapes the span
546 * The length of at most one axis can be deduced using the total size constraint. The axis can
547 * be marked for deduction by specifying the corresponding size as -1.
549 * The axes for which no size was provided (excluding -1) will be assumed to be one.
552 * - the span must be non-empty
553 * - [start, end) represents a forward range containing the length of the axes in order
554 * - the number of axis lengths must be less than or equal to the rank
555 * - at most one axis length is allowed for length deduction
556 * - the lengths provided must ensure that the total number of elements remains unchnged
558 * Exception Guarantee: Strong
560 template <class ForwardItr>
561 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
562 ::type reshape(ForwardItr start, ForwardItr end) {
563 CV_Assert(start != end);
564 CV_Assert(std::distance(start, end) <= rank());
566 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
568 /* the user may leave at most one axis size for deduction by specifying -1 */
569 auto sizes_to_deduce = std::count(start, end, -1);
570 if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
572 /* sizes must be positive numbers with the exception of -1 */
573 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
574 return !(x > 0 || x == -1);
576 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
578 /* compute the total number of elements in the new tensor */
579 size_type unknown_size = 0;
580 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
582 /* there is an unknown size */
583 if (std::abs(total) <= size()) {
584 unknown_size = size() / std::abs(total);
587 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
588 ** Since `total` is negative, the size check which follows will fail and throw an error
592 /* the number of elements before and after reshape must be exactly same */
593 if (total != size()) {
594 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
597 /* we assume the size of the unspecified axes to be one */
598 std::fill(std::begin(shape), std::end(shape), 1);
599 std::copy_backward(start, end, std::end(shape));
601 /* replace the unknown axis with the correct value */
602 std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
605 /** @brief reshapes the tensor
606 * constructs a range out of the arguments and invokes the range-based reshape method
608 template <class ...Sizes>
609 void reshape(Sizes... new_sizes_) {
610 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "unsupported tensor rank");
611 static_assert(sizeof...(Sizes) > 0, "no sizes provided");
612 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
613 reshape(std::begin(new_sizes), std::end(new_sizes));
616 /** @brief reshapes the span
619 * - the reference tensor/span/view must be non-empty
620 * - the reference tensor/span/view's rank must be less than or equal to the rank of the span
622 * Exception Guarantee: Strong
624 template <class TensorType>
625 void reshape_as(const TensorType& tensor) {
626 CV_Assert(!tensor.empty());
627 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
628 for (int i = 0; i < new_sizes.size(); i++)
629 new_sizes[i] = tensor.get_axis_size(i);
630 reshape(std::begin(new_sizes), std::end(new_sizes));
633 /** @brief squeezes the tensor
635 * removes all axes of unit size
638 * - the span must be non-empty
639 * - the span's rank must be at least two
641 * Exception Guarantee: Strong
645 CV_Assert(rank() >= 2);
646 auto itr = std::remove(std::begin(shape), std::end(shape), 1);
647 shape.resize(itr - std::begin(shape));
650 /** @brief squeezes the tensor
652 * removes the specified axis if the axis length is one; otherwise, ignores the request
655 * - the span must be non-empty
656 * - the span's rank must be at least two
658 * Exception Guarantee: Strong
660 void squeeze(int axis) {
662 CV_Assert(rank() >= 2);
663 axis = clamp_axis(axis, rank());
664 CV_Assert(axis >= 0 && axis < rank());
665 shape.erase(std::begin(shape) + axis);
668 /** @brief unsqueezes the tensor
670 * adds a axis of unit size at the requested before the specified axis
673 * - the span must be non-empty
674 * - the span's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
676 * Exception Guarantee: Strong
678 void unsqueeze(int axis = 0) {
680 CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
681 axis = clamp_axis(axis, rank());
682 CV_Assert(axis >= 0 && axis < rank());
683 shape.insert(std::begin(shape) + axis, 1);
686 /** @brief obtains a subspan of the span
689 * - the span must be non-empty
690 * - the `offset` must be less than the size of the span
691 * - [start, end) represents a forward range containing length of the subspan axes
692 * - the lengths provided must ensure that the number of elements does not exceed (old size - offset)
694 * Exception Guarantee: Strong
696 template <class ForwardItr>
697 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorSpan>
698 ::type subspan(size_type offset, ForwardItr start, ForwardItr end) const {
699 CV_Assert(start != end);
700 CV_Assert(std::distance(start, end) <= rank());
702 auto cur_size = size();
703 CV_Assert(offset < cur_size);
705 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
707 /* sizes must be positive numbers */
708 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
711 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
713 /* the number of elements must be equal to the new size */
714 auto max_size = (cur_size - offset);
715 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
716 if (total > max_size) {
717 CV_Error(Error::StsBadArg, "axis lengths lead to OOB accesses");
721 temp.shape.assign(start, end);
722 temp.ptr = ptr + offset;
726 /** @brief obtains a subspan of the span
727 * constructs a range out of the size arguments and invokes the range-based subspan method
729 template <class ...Sizes>
730 TensorSpan subspan(size_type offset, Sizes... new_sizes_) const {
731 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
732 static_assert(sizeof...(Sizes) > 0, "no sizes provided");
733 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
734 return subspan(offset, std::begin(new_sizes), std::end(new_sizes));
737 operator Span<T>() noexcept { return Span<T>(ptr, size()); }
738 operator View<T>() const noexcept { return View<T>(ptr, size()); }
740 friend void swap(TensorSpan& lhs, TensorSpan& rhs) noexcept {
742 swap(lhs.ptr, rhs.ptr);
743 swap(lhs.shape, rhs.shape);
747 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
751 /** @brief view of a tensor
753 * \tparam T type of data stored by the tensor
755 * A view is valid if and only if the following hold true:
756 * - view is non-empty
757 * - viewed memory is still allocated
762 using value_type = typename Tensor<T>::value_type;
763 using pointer = typename Tensor<T>::pointer;
764 using const_pointer = typename Tensor<T>::const_pointer;
765 using size_type = typename Tensor<T>::size_type;
767 TensorView() noexcept : ptr{ nullptr } { }
768 TensorView(const TensorView&) noexcept = default;
769 TensorView(TensorSpan<T> other) noexcept : ptr{ other.get() } {
770 const auto rank = other.rank();
772 for (int i = 0; i < rank; i++)
773 shape[i] = other.get_axis_size(i);
775 TensorView(const Tensor<T>& tensor) noexcept : ptr{ tensor.get() } {
776 const auto rank = tensor.rank();
778 for (int i = 0; i < rank; i++)
779 shape[i] = tensor.get_axis_size(i);
782 template <class ForwardItr>
783 TensorView(pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } {
784 CV_Assert(start != end);
785 CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
787 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
788 if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) {
789 CV_Error(Error::StsBadArg, "the given shape contains negative or zero size");
792 shape.assign(start, end);
795 /** creates a subview of a tensor (or span or view); refer to subview method for more details */
796 template <class... Args>
797 TensorView(TensorView other, size_type offset, Args&&... args) noexcept
798 : TensorView(other.subview(offset, std::forward<Args>(args)...)) { }
800 TensorView& operator=(const TensorView&) = default;
801 TensorView& operator=(TensorSpan<T> other) noexcept {
802 TensorView tmp(other);
807 /** returns true if the view is empty */
808 bool empty() const noexcept { return shape.size() == 0; }
810 /** returns the total number of elements in the view
813 * - view must be non-empty
815 size_type size() const noexcept {
817 return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
820 /** returns the rank of the view
823 * - view must be non-empty
825 size_type rank() const noexcept {
830 /** @brief returns the length of the axis
832 * Every axis is assigned a zero-based index which can be used to select an axis.
833 * Negative index can be used to select an axis from the end.
836 * > -1 represents the last axis
837 * > 0 represents the first axis
838 * > 1 represents the second axis
841 * - view must be non-empty
842 * - the axis must be in the range [-rank(), rank())
844 size_type get_axis_size(int axis) const noexcept {
845 axis = clamp_axis(axis, rank());
846 CV_Assert(axis >= 0 && axis < rank());
850 /** @brief returns the combined size of the axes in an axis range
852 * if the shape is [3 x 5 x 7 x 11]
853 * - `size_range(0, 2)` will return 3 x 5 = 15
854 * - `size_range(1, 3)` will return 5 x 7 = 35
855 * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
858 * - view must be non-empty
859 * - `axis_start` must be less than or equal to `axis_end`
860 * - `axis_end` must be less than or equal to the rank
862 * returns one if the two `axis_start` and `axis_end` are equal
864 size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
866 CV_Assert(axis_start <= axis_end);
867 CV_Assert(axis_end <= rank());
868 auto start = std::begin(shape) + axis_start;
869 auto end = std::begin(shape) + axis_end;
870 return std::accumulate(start, end, 1, std::multiplies<size_type>());
873 /** returns an std::vector containing axis lengths starting from axis zero
876 * - view must be non-empty
878 * Exception Guarantee: Strong
880 std::vector<size_type> shape_as_vector() const {
882 return std::vector<size_type>(std::begin(shape), std::end(shape));
885 /** returns a device pointer to immutable device memory */
886 const_pointer get() const noexcept { return ptr; }
888 /** @brief reshapes the view
891 * The length of at most one axis can be deduced using the total size constraint. The axis can
892 * be marked for deduction by specifying the size as -1.
894 * The axes for which no size was provided (excluding -1) will be assumed to be one.
897 * - view must be non-empty
898 * - [start, end) represents a forward range containing length of the axes in order starting from axis zero
899 * - the number of axis lengths must be less than or equal to the tensor rank
900 * - at most one axis length is allowed for length deduction
901 * - the lengths provided must ensure that the total number of elements remains unchnged
903 * Exception Guarantee: Strong
905 template <class ForwardItr>
906 typename std::enable_if<!std::is_integral<ForwardItr>::value, void>
907 ::type reshape(ForwardItr start, ForwardItr end) {
908 CV_Assert(start != end);
909 CV_Assert(std::distance(start, end) <= rank());
911 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
913 /* the user may leave at most one axis size for deduction by specifying -1 */
914 auto sizes_to_deduce = std::count(start, end, -1);
915 if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
917 /* sizes must be positive numbers with the exception of -1 */
918 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
919 return !(x > 0 || x == -1);
921 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
923 /* compute the total number of elements in the new tensor */
924 size_type unknown_size = 0;
925 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
927 /* there is an unknown size */
928 if (std::abs(total) <= size()) {
929 unknown_size = size() / std::abs(total);
932 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
933 ** Since `total` is negative, the size check which follows will fail and throw an error
937 /* the number of elements before and after reshape must be exactly same */
938 if (total != size()) {
939 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
942 /* we assume the size of the unspecified axes to be one */
943 std::fill(std::begin(shape), std::end(shape), 1);
944 std::copy_backward(start, end, std::end(shape));
946 /* replace the unknown axis with the correct value */
947 std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
950 /** @brief reshapes the view
951 * constructs a range out of the arguments and invokes the range-based reshape method
953 template <class ...Sizes>
954 void reshape(Sizes... new_sizes_) {
955 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
956 static_assert(sizeof...(Sizes) > 0, "no sizes provided");
957 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
958 reshape(std::begin(new_sizes), std::end(new_sizes));
961 /** @brief reshapes the view
964 * - the reference tensor/span/view must be non-empty
965 * - the reference tensor/span/view's rank must be less than or equal to the rank of the view
967 * Exception Guarantee: Strong
969 template <class TensorType>
970 void reshape_as(const TensorType& tensor) {
971 CV_Assert(!tensor.empty());
972 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
973 for (int i = 0; i < new_sizes.size(); i++)
974 new_sizes[i] = tensor.get_axis_size(i);
975 reshape(std::begin(new_sizes), std::end(new_sizes));
978 /** @brief squeezes the tensor
980 * removes all axes of unit size
983 * - the view must be non-empty
984 * - the view's rank must be at least two
986 * Exception Guarantee: Strong
990 CV_Assert(rank() >= 2);
991 auto itr = std::remove(std::begin(shape), std::end(shape), 1);
992 shape.resize(itr - std::begin(shape));
995 /** @brief squeezes the tensor
997 * removes the specified axis if the axis length is one; otherwise, ignores the request
1000 * - the view must be non-empty
1001 * - the view's rank must be at least two
1003 * Exception Guarantee: Strong
1005 void squeeze(int axis) {
1006 CV_Assert(!empty());
1007 CV_Assert(rank() >= 2);
1008 axis = clamp_axis(axis, rank());
1009 CV_Assert(axis >= 0 && axis < rank());
1010 shape.erase(std::begin(shape) + axis);
1013 /** @brief unsqueezes the tensor
1015 * adds a axis of unit size at the requested before the specified axis
1018 * - the view must be non-empty
1019 * - the view's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
1021 * Exception Guarantee: Strong
1023 void unsqueeze(int axis = 0) {
1024 CV_Assert(!empty());
1025 CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
1026 axis = clamp_axis(axis, rank());
1027 CV_Assert(axis >= 0 && axis < rank());
1028 shape.insert(std::begin(shape) + axis, 1);
1031 /** @brief obtains a subview of the view
1033 * The axes for which no size was provided will be assumed to be one.
1036 * - the view must be non-empty
1037 * - the `offset` must be less than the size of the view
1038 * - [start, end) represents a forward range containing length of the subview axes in order
1039 * - the number of axis lengths provided must be less than or equal to the tensor rank
1040 * - the lengths provided must ensure that the number of elements does not exceed (old size - offset)
1042 * Exception Guarantee: Strong
1044 template <class ForwardItr>
1045 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorView>
1046 ::type subview(size_type offset, ForwardItr start, ForwardItr end) const {
1047 CV_Assert(start != end);
1048 CV_Assert(std::distance(start, end) <= rank());
1050 auto cur_size = size();
1051 CV_Assert(offset < cur_size);
1053 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
1055 /* sizes must be positive numbers */
1056 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
1059 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
1061 /* the number of elements must be equal to the new size */
1062 auto max_size = (cur_size - offset);
1063 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
1064 if (total > max_size) {
1065 CV_Error(Error::StsBadArg, "axes lengths lead to OOB accesses");
1069 temp.shape.assign(start, end);
1070 temp.ptr = ptr + offset;
1074 /** @brief obtains a subview of the view
1075 * constructs a range out of the size arguments and invokes the range-based subview method
1077 template <class ...Sizes>
1078 TensorView subview(size_type offset, Sizes... new_sizes_) const {
1079 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
1080 static_assert(sizeof...(Sizes) > 0, "no sizes provided");
1081 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
1082 return subview(offset, std::begin(new_sizes), std::end(new_sizes));
1085 operator View<T>() const noexcept { return View<T>(ptr, size()); }
1087 friend void swap(TensorView& lhs, TensorView& rhs) noexcept {
1089 swap(lhs.ptr, rhs.ptr);
1090 swap(lhs.shape, rhs.shape);
1094 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
1098 /** returns true if the two TensorType objects have the same shape */
1099 template <class TensorType1, class TensorType2>
1100 bool is_shape_same(const TensorType1& x, const TensorType2& y) noexcept {
1101 auto rank1 = x.rank();
1102 auto rank2 = y.rank();
1107 for (int i = 0; i < rank1; i++)
1108 if (x.get_axis_size(i) != y.get_axis_size(i))
1113 /** returns true if the two TensorType objects are compatible */
1114 template <class TensorType1, class TensorType2>
1115 bool is_shape_compatible(const TensorType1& x, const TensorType2& y) noexcept {
1116 const auto rank1 = x.rank();
1117 const auto rank2 = y.rank();
1119 /* mathematically not required but is a technically required */
1123 for (int i = 0; i < rank1; i++)
1124 if (x.get_axis_size(i) != y.get_axis_size(i) &&
1125 x.get_axis_size(i) != 1 && y.get_axis_size(i) != 1)
1130 /** returns the rank to which the given tensor can be squeezed to */
1131 template <class TensorType>
1132 std::size_t get_effective_rank(const TensorType& x) noexcept {
1133 const auto rank = x.rank();
1134 auto effective_rank = rank;
1135 for (int i = 0; i < rank; i++, effective_rank--)
1136 if (x.get_axis_size(i) != 1)
1138 return effective_rank;
1141 }}}} /* namespace cv::dnn::cuda4dnn::csl */
1143 #endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP */