2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Copyright (c) 2016, 2017 ARM Limited.
20 * SPDX-License-Identifier: MIT
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to
24 * deal in the Software without restriction, including without limitation the
25 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26 * sell copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
29 * The above copyright notice and this permission notice shall be included in all
30 * copies or substantial portions of the Software.
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43 #if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
44 /** Perform reduce max/min
46 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
48 * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
50 * @attention Operation type(code) specifying which operation to perform should be passed as
51 * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1
53 * @param[in] input_ptr Pointer to the source image. Supported data
54 * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
55 * @param[in] input_stride_x Stride of the source image in X dimension (in
57 * @param[in] input_step_x input_stride_x * number of elements along X
58 * processed per workitem(in bytes)
59 * @param[in] input_stride_y Stride of the source image in Y dimension (in
61 * @param[in] input_step_y input_stride_y * number of elements along Y
62 * processed per workitem(in bytes)
63 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
65 * @param[in] input_step_z input_stride_z * number of elements along Z
66 * processed per workitem(in bytes)
67 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
69 * @param[in] input_stride_w Stride of the source tensor in W dimension (in
71 * @param[in] input_step_w output_stride_w * number of elements along W
72 * processed per workitem(in bytes)
73 * @param[out] output_ptr Pointer to the destination image. Supported data
74 * types: same as @p input_ptr
75 * @param[in] output_stride_x Stride of the destination image in X dimension
77 * @param[in] output_step_x output_stride_x * number of elements along X
78 * processed per workitem(in bytes)
79 * @param[in] output_stride_y Stride of the destination image in Y dimension
81 * @param[in] output_step_y output_stride_y * number of elements along Y
82 * processed per workitem(in bytes)
83 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
85 * @param[in] output_step_z output_stride_z * number of elements along Z
86 * processed per workitem(in bytes)
87 * @param[in] output_stride_w Stride of the source tensor in W dimension (in
89 * @param[in] output_step_w output_stride_w * number of elements along W
90 * processed per workitem(in bytes)
91 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
93 * @param[in] axis Axis through which reduction occurs
94 * @param[in] dim Dimension across the axis to be reduced.
96 __kernel void reduce_min_max(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output),
97 const int axis, const int dim)
99 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
100 Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
103 get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
104 get_global_id(2) / DEPTH_OUT,
108 *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
109 for (int i = 1; i < dim; ++i)
113 #if OP_CODE == 1 // REDUCE_MAX
114 value = max(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
115 indices[2], indices[3])));
117 #elif OP_CODE == 2 // REDUCE_MIN
118 value = min(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
119 indices[2], indices[3])));
121 #else // OP NOT SUPPORTED
127 *((__global DATA_TYPE *)out.ptr) = value;
130 /** Perform reduce sum/mean
132 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
134 * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
135 * e.g. -DDEPTH_OUT=16
136 * @attention Operation type(code) specifying which operation to perform should be passed as
137 * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1
139 * @param[in] input_ptr Pointer to the source image. Supported data
140 * types: U8/S8/U16/S16/F16/U32/S32/F32
141 * @param[in] input_stride_x Stride of the source image in X dimension (in
143 * @param[in] input_step_x input_stride_x * number of elements along X
144 * processed per workitem(in bytes)
145 * @param[in] input_stride_y Stride of the source image in Y dimension (in
147 * @param[in] input_step_y input_stride_y * number of elements along Y
148 * processed per workitem(in bytes)
149 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
151 * @param[in] input_step_z input_stride_z * number of elements along Z
152 * processed per workitem(in bytes)
153 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
155 * @param[in] input_stride_w Stride of the source tensor in W dimension (in
157 * @param[in] input_step_w output_stride_w * number of elements along W
158 * processed per workitem(in bytes)
159 * @param[out] output_ptr Pointer to the destination image. Supported data
160 * types: same as @p input_ptr
161 * @param[in] output_stride_x Stride of the destination image in X dimension
163 * @param[in] output_step_x output_stride_x * number of elements along X
164 * processed per workitem(in bytes)
165 * @param[in] output_stride_y Stride of the destination image in Y dimension
167 * @param[in] output_step_y output_stride_y * number of elements along Y
168 * processed per workitem(in bytes)
169 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
171 * @param[in] output_step_z output_stride_z * number of elements along Z
172 * processed per workitem(in bytes)
173 * @param[in] output_stride_w Stride of the source tensor in W dimension (in
175 * @param[in] output_step_w output_stride_w * number of elements along W
176 * processed per workitem(in bytes)
177 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
179 * @param[in] axis Axis through which reduction occurs
180 * @param[in] dim Dimension across the axis to be reduced.
182 __kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output),
183 const int axis, const int dim)
185 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
186 Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
189 get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
190 get_global_id(2) / DEPTH_OUT,
193 DATA_TYPE sum_value = (DATA_TYPE)0;
194 for (int i = 0; i < dim; ++i)
198 (__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
201 #if OP_CODE == 3 // REDUCE_SUM
202 *((__global DATA_TYPE *)out.ptr) = sum_value;
204 #elif OP_CODE == 4 // REDUCE_MEAN
205 *((__global DATA_TYPE *)out.ptr) = sum_value / CONVERT(dim, DATA_TYPE);
207 #else // OP NOT SUPPORTED
212 #endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)