2 * Copyright (c) 2017 ARM Limited.
4 * SPDX-License-Identifier: MIT
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "arm_compute/core/AccessWindowTranspose.h"
26 #include "arm_compute/core/Helpers.h"
27 #include "arm_compute/core/TensorInfo.h"
28 #include "arm_compute/core/Window.h"
30 using namespace arm_compute;
32 void AccessWindowTranspose::set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size)
34 Coordinates &anchor = input_valid_region.anchor;
35 Coordinates old_anchor(anchor);
36 TensorShape &shape = input_valid_region.shape;
40 border_size = BorderSize(0);
43 // Start of the valid region is equal to the start of the window. But it
44 // cannot be less than the start of the input's valid region plus the border
45 // size required by this kernel (if undefined).
46 // Additionally the valid region is shifted by the offset that is used by
47 // the kernel to write back output values.
48 // Note that because the class can handle skewed transpose operations all
49 // size have to be scaled.
50 anchor.set(0, std::max<int>(DIV_CEIL(window.y().start(), window.y().step()) * _width, DIV_CEIL(anchor[1] + border_size.left, window.y().step()) * _width) + _x);
51 anchor.set(1, std::max<int>(DIV_CEIL(window.x().start(), window.x().step()) * _height, DIV_CEIL(anchor[0] + border_size.top, window.x().step()) * _height) + _y);
53 // End of the valid region is equal to the start of the last write of the
54 // kernel plus the number of written elements. (This assumes that all
55 // written elements are valid). Nevertheless the end cannot be larger than
56 // the end of the input's valid region minus the border size.
57 // Note: not the end points of the region are stored but its size. Thus the
58 // old size is first converted into end points to compared against the
59 // execution window. Afterwards the new end points are converted back into
60 // a size of the region.
61 // Note that because the class can handle skewed transpose operations all
62 // size have to be scaled.
63 shape.set(0, std::min<int>(((old_anchor[0] + shape[1] - border_size.right) / window.y().step()) * _width, (window.y().end() / window.y().step()) * _width));
64 shape.set(1, std::min<int>(((old_anchor[1] + shape[0] - border_size.bottom) / window.x().step()) * _height, (window.x().end() / window.x().step()) * _height));
66 // For higher dimensions use the intersection of the window size and the
67 // valid region of the input
68 for(size_t d = 2; d < _info->num_dimensions(); ++d)
70 anchor.set(d, std::max(window[d].start(), input_valid_region.anchor[d]));
71 shape.set(d, std::min<int>(window[d].end(), input_valid_region.shape[d]) - anchor[d]);
74 _info->set_valid_region(input_valid_region);
77 bool AccessWindowTranspose::update_window_if_needed(Window &window) const
79 // Only update the window size if we can't use padding
80 if(_info == nullptr || _info->is_resizable())
85 const TensorShape &shape = _info->tensor_shape();
86 const Strides &strides = _info->strides_in_bytes();
87 const size_t offset_first_element = _info->offset_first_element_in_bytes();
89 bool window_modified = false;
93 // Transpose and scale according to the number ratio between processed elements in input and output
94 const int min_y = (window.x().start() / window.x().step()) * _height + _y;
95 const int max_y = (window.x().end() / window.x().step()) * _height + _y;
97 // Adjust window start for output's Y dimension (so X in (input) window)
100 // Calculate rows available above the tensor
101 const int front_pad_y_available = -offset_first_element / strides[1];
103 if(min_y < front_pad_y_available)
105 // Not enough padding available, need to shrink the window
106 const int start = ((adjust_up(min_y, front_pad_y_available, _height) - _y) / _height) * window.x().step();
108 window.set(0, Window::Dimension(start, window.x().end(), window.x().step()));
109 window_modified = true;
112 // Update front padding with reconstructed value
113 front_pad_y = std::max(0, -(window.x().start() / window.x().step()) * _height - _y);
116 // Adjust window end for Y dimension
117 if(max_y > static_cast<int>(shape[1]))
119 const int stride_z = _info->num_dimensions() > 2 ? strides[2] : _info->total_size();
121 // Calculate rows available below the tensor
122 const int tail_pad_y_available = (stride_z / strides[1]) - shape[1] - front_pad_y;
124 if(static_cast<int>(shape[1]) + tail_pad_y_available < max_y)
126 // Not enough padding available, need to shrink the window
127 const int end = ((adjust_down(max_y, shape[1] + tail_pad_y_available, _height) - _y) / _height) * window.x().step();
128 window.set(0, Window::Dimension(window.x().start(), end, window.x().step()));
129 window_modified = true;
135 // Transpose and scale according to the number ratio between processed elements in input and output
136 const int min_x = (window.y().start() / window.y().step()) * _width + _x;
137 const int max_x = (window.y().end() / window.y().step()) * _width + _x;
139 const int stride_y = _info->num_dimensions() > 1 ? strides[1] : _info->total_size();
141 // Adjust window start for X dimension
144 const int front_pad_x_available = -std::min<int>(static_cast<int>(offset_first_element) - front_pad_y * strides[1], stride_y - shape[0] * strides[0]) / static_cast<int>(strides[0]);
146 if(min_x < front_pad_x_available)
148 // Not enough padding available, need to shrink the window
149 const int start = ((adjust_up(min_x, front_pad_x_available, _width) - _x) / _width) * window.y().step();
150 window.set(1, Window::Dimension(start, window.y().end(), window.y().step()));
151 window_modified = true;
154 // Update front padding with reconstructed value
155 front_pad_x = std::max(0, -(window.y().start() / window.y().step()) * _width - _x);
158 // Adjust window end for X dimension
159 if(max_x > static_cast<int>(shape[0]))
161 const int tail_pad_x_available = (stride_y / strides[0]) - shape[0] - front_pad_x;
163 if(static_cast<int>(shape[0]) + tail_pad_x_available < max_x)
165 // Not enough padding available, need to shrink the window
166 const int end = ((adjust_down(max_x, shape[0] + tail_pad_x_available, _width) - _x) / _width) * window.y().step();
167 window.set(1, Window::Dimension(window.y().start(), end, window.y().step()));
168 window_modified = true;
172 return window_modified;
175 bool AccessWindowTranspose::update_padding_if_needed(const Window &window) const
177 // Only update the padding if the tensor allows it
178 if(_info == nullptr || !_info->is_resizable())
183 ARM_COMPUTE_ERROR_ON(window.y().step() == 0);
184 ARM_COMPUTE_ERROR_ON(window.x().step() == 0);
186 const int min_x = (window.y().start() / window.y().step()) * _width + _x;
187 const int max_x = (window.y().end() / window.y().step()) * _width + _x;
188 const int min_y = (window.x().start() / window.y().step()) * _height + _y;
189 const int max_y = (window.x().end() / window.x().step()) * _height + _y;
191 const TensorShape &shape = _info->tensor_shape();
194 padding.left = std::max(0, -min_x);
195 padding.right = std::max<int>(0, max_x - shape[0]);
196 padding.top = shape.num_dimensions() == 1 ? 0 : std::max(0, -min_y);
197 padding.bottom = shape.num_dimensions() == 1 ? 0 : std::max<int>(0, max_y - shape[1]);
199 // Update strides in tensor info
200 return _info->extend_padding(padding);