2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Copyright (c) 2016, 2017 ARM Limited.
20 * SPDX-License-Identifier: MIT
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to
24 * deal in the Software without restriction, including without limitation the
25 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26 * sell copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
29 * The above copyright notice and this permission notice shall be included in all
30 * copies or substantial portions of the Software.
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43 #if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
44 /** Perform reduce max/min
46 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
48 * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
50 * @attention Operation type(code) specifying which operation to perform should be passed as
51 * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1
53 * @param[in] input_ptr Pointer to the source image. Supported data
54 * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
55 * @param[in] input_stride_x Stride of the source image in X dimension (in
57 * @param[in] input_step_x input_stride_x * number of elements along X
58 * processed per workitem(in bytes)
59 * @param[in] input_stride_y Stride of the source image in Y dimension (in
61 * @param[in] input_step_y input_stride_y * number of elements along Y
62 * processed per workitem(in bytes)
63 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
65 * @param[in] input_step_z input_stride_z * number of elements along Z
66 * processed per workitem(in bytes)
67 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
69 * @param[in] input_stride_w Stride of the source tensor in W dimension (in
71 * @param[in] input_step_w output_stride_w * number of elements along W
72 * processed per workitem(in bytes)
73 * @param[out] output_ptr Pointer to the destination image. Supported data
74 * types: same as @p input_ptr
75 * @param[in] output_stride_x Stride of the destination image in X dimension
77 * @param[in] output_step_x output_stride_x * number of elements along X
78 * processed per workitem(in bytes)
79 * @param[in] output_stride_y Stride of the destination image in Y dimension
81 * @param[in] output_step_y output_stride_y * number of elements along Y
82 * processed per workitem(in bytes)
83 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
85 * @param[in] output_step_z output_stride_z * number of elements along Z
86 * processed per workitem(in bytes)
87 * @param[in] output_stride_w Stride of the source tensor in W dimension (in
89 * @param[in] output_step_w output_stride_w * number of elements along W
90 * processed per workitem(in bytes)
91 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
93 * @param[in] axis Axis through which reduction occurs
94 * @param[in] dim Dimension across the axis to be reduced.
96 __kernel void reduce_min_max(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output),
97 const int axis, const int dim)
99 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
100 Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
105 get_global_id(2) % DEPTH_OUT,
106 get_global_id(2) / DEPTH_OUT,
110 *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
111 for (int i = 1; i < dim; ++i)
115 #if OP_CODE == 1 // REDUCE_MAX
116 value = max(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
117 indices[2], indices[3])));
119 #elif OP_CODE == 2 // REDUCE_MIN
120 value = min(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
121 indices[2], indices[3])));
123 #else // OP NOT SUPPORTED
129 *((__global DATA_TYPE *)out.ptr) = value;
132 /** Perform reduce sum/mean
134 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
136 * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
137 * e.g. -DDEPTH_OUT=16
138 * @attention Operation type(code) specifying which operation to perform should be passed as
139 * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1
141 * @param[in] input_ptr Pointer to the source image. Supported data
142 * types: U8/S8/U16/S16/F16/U32/S32/F32
143 * @param[in] input_stride_x Stride of the source image in X dimension (in
145 * @param[in] input_step_x input_stride_x * number of elements along X
146 * processed per workitem(in bytes)
147 * @param[in] input_stride_y Stride of the source image in Y dimension (in
149 * @param[in] input_step_y input_stride_y * number of elements along Y
150 * processed per workitem(in bytes)
151 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
153 * @param[in] input_step_z input_stride_z * number of elements along Z
154 * processed per workitem(in bytes)
155 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
157 * @param[in] input_stride_w Stride of the source tensor in W dimension (in
159 * @param[in] input_step_w output_stride_w * number of elements along W
160 * processed per workitem(in bytes)
161 * @param[out] output_ptr Pointer to the destination image. Supported data
162 * types: same as @p input_ptr
163 * @param[in] output_stride_x Stride of the destination image in X dimension
165 * @param[in] output_step_x output_stride_x * number of elements along X
166 * processed per workitem(in bytes)
167 * @param[in] output_stride_y Stride of the destination image in Y dimension
169 * @param[in] output_step_y output_stride_y * number of elements along Y
170 * processed per workitem(in bytes)
171 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
173 * @param[in] output_step_z output_stride_z * number of elements along Z
174 * processed per workitem(in bytes)
175 * @param[in] output_stride_w Stride of the source tensor in W dimension (in
177 * @param[in] output_step_w output_stride_w * number of elements along W
178 * processed per workitem(in bytes)
179 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
181 * @param[in] axis Axis through which reduction occurs
182 * @param[in] dim Dimension across the axis to be reduced.
184 __kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output),
185 const int axis, const int dim)
187 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
188 Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
193 get_global_id(2) % DEPTH_OUT,
194 get_global_id(2) / DEPTH_OUT,
197 DATA_TYPE sum_value = (DATA_TYPE)0;
198 for (int i = 0; i < dim; ++i)
202 *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
205 #if OP_CODE == 3 // REDUCE_SUM
206 *((__global DATA_TYPE *)out.ptr) = sum_value;
208 #elif OP_CODE == 4 // REDUCE_MEAN
209 *((__global DATA_TYPE *)out.ptr) = sum_value / CONVERT(dim, DATA_TYPE);
211 #else // OP NOT SUPPORTED
216 #endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)