compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * Copyright (c) 2018-2020 ARM Limited.
  19  *
  20  * SPDX-License-Identifier: MIT
  21  *
  22  * Permission is hereby granted, free of charge, to any person obtaining a copy
  23  * of this software and associated documentation files (the "Software"), to
  24  * deal in the Software without restriction, including without limitation the
  25  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  26  * sell copies of the Software, and to permit persons to whom the Software is
  27  * furnished to do so, subject to the following conditions:
  28  *
  29  * The above copyright notice and this permission notice shall be included in all
  30  * copies or substantial portions of the Software.
  31  *
  32  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  33  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  34  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  35  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  36  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  37  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  38  * SOFTWARE.
  39  */
  40 #include "helpers.h"
  41
  42 #if defined(DATA_TYPE) && defined(AXIS) && defined(DEPTH) && defined(OUTPUT_DIM_Z)
  43
  44 /** Performs the OneHot operation along the chosen axis
  45  * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
  46  * -DDATA_TYPE=short
  47  * @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1
  48  * @attention Output tensor depth should be given as a preprocessor argument using
  49  * -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16
  50  * @attention Input tensor depth should be given as a preprocessor argument using
  51  * -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16
  52  *
  53  *
  54  * @param[in]  indices_ptr                              Pointer to the source tensor. Supported data
  55  * types: S32
  56  * @param[in]  indices_stride_x                         Stride of the source tensor in X dimension
  57  * (in bytes)
  58  * @param[in]  indices_step_x                           indices_stride_x * number of elements along
  59  * X processed per work item (in bytes)
  60  * @param[in]  indices_stride_y                         Stride of the source tensor in Y dimension
  61  * (in bytes)
  62  * @param[in]  indices_step_y                           indices_stride_y * number of elements along
  63  * Y processed per work item (in bytes)
  64  * @param[in]  indices_stride_z                         Stride of the source tensor in Y dimension
  65  * (in bytes)
  66  * @param[in]  indices_step_z                           indices_stride_z * number of elements along
  67  * Z processed per work item (in bytes)
  68  * @param[in]  indices_offset_first_element_in_bytes    Offset of the first element in the source
  69  * tensor
  70  * @param[in]  on_value_ptr                             Pointer to the on_value vector. Supported
  71  * data types: U8/S8/U16/S16/F16/U32/S32/F32.
  72  * @param[in]  on_value_stride_x                        Stride of the on_value vector in X dimension
  73  * (in bytes)
  74  * @param[in]  on_value_step_x                          on_value_stride_x * number of elements along
  75  * X processed per work item (in bytes)
  76  * @param[in]  on_value_offset_first_element_in_bytes   Offset of the first element in the on_value
  77  * vector
  78  * @param[in]  off_value_ptr                            Pointer to the off_value vector. Supported
  79  * data types: Same as @p on_value.
  80  * @param[in]  off_value_stride_x                       Stride of the off_value vector in X
  81  * dimension (in bytes)
  82  * @param[in]  off_value_step_x                         off_value_stride_x * number of elements
  83  * along X processed per work item (in bytes)
  84  * @param[in]  off_value_offset_first_element_in_bytes  Offset of the first element in the off_value
  85  * vector
  86  * @param[out] output_ptr                               Pointer to the destination tensor. Supported
  87  * data types: same as @p on_value
  88  * @param[in]  output_stride_x                          Stride of the destination tensor in X
  89  * dimension (in bytes)
  90  * @param[in]  output_step_x                            output_stride_x * number of elements along X
  91  * processed per work item (in bytes)
  92  * @param[in]  output_stride_y                          Stride of the destination tensor in Y
  93  * dimension (in bytes)
  94  * @param[in]  output_step_y                            output_stride_y * number of elements along Y
  95  * processed per work item (in bytes)
  96  * @param[in]  output_stride_z                          Stride of the destination tensor in Z
  97  * dimension (in bytes)
  98  * @param[in]  output_step_z                            output_stride_z * number of elements along Z
  99  * processed per work item (in bytes)
 100  * @param[in]  output_stride_w                          Stride of the destination tensor in W
 101  * dimension (in bytes)
 102  * @param[in]  output_step_w                            output_stride_w * number of elements along W
 103  * processed per work item (in bytes)
 104  * @param[in]  output_offset_first_element_in_bytes     Offset of the first element in the
 105  * destination tensor
 106  */
 107 __kernel void one_hot(TENSOR3D_DECLARATION(indices), VECTOR_DECLARATION(on_value),
 108                       VECTOR_DECLARATION(off_value), TENSOR4D_DECLARATION(output))
 109 {
 110   const int px = get_global_id(0);
 111   const int py = get_global_id(1);
 112   const int pz = get_global_id(2) % OUTPUT_DIM_Z;
 113   const int pw = get_global_id(2) / OUTPUT_DIM_Z;
 114
 115   const Tensor3D indices = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(indices);
 116   Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, OUTPUT_DIM_Z);
 117
 118 #if AXIS == 0
 119   const int index = *(__global const int *)tensor3D_offset(&indices, py, pz, pw);
 120   *(__global DATA_TYPE *)output.ptr = index == px ? *((__global const DATA_TYPE *)on_value_ptr)
 121                                                   : *((__global const DATA_TYPE *)off_value_ptr);
 122 #elif AXIS == 1
 123   const uint index = *(__global const uint *)tensor3D_offset(&indices, px, pz, pw);
 124   *(__global DATA_TYPE *)output.ptr = index == py ? *((__global const DATA_TYPE *)on_value_ptr)
 125                                                   : *((__global const DATA_TYPE *)off_value_ptr);
 126 #elif AXIS == 2
 127   const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, pw);
 128   *(__global DATA_TYPE *)output.ptr = index == pz ? *((__global const DATA_TYPE *)on_value_ptr)
 129                                                   : *((__global const DATA_TYPE *)off_value_ptr);
 130 #elif AXIS == 3
 131   const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, pz);
 132   *(__global DATA_TYPE *)output.ptr = index == pw ? *((__global const DATA_TYPE *)on_value_ptr)
 133                                                   : *((__global const DATA_TYPE *)off_value_ptr);
 134 #endif // AXIS
 135 }
 136
 137 /** Performs the OneHot operation along the chosen axis as off_value being zero
 138  * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
 139  * -DDATA_TYPE=short
 140  * @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1
 141  * @attention Output tensor depth should be given as a preprocessor argument using
 142  * -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16
 143  * @attention Input tensor depth should be given as a preprocessor argument using
 144  * -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16
 145  *
 146  *
 147  * @param[in]  indices_ptr                              Pointer to the source tensor. Supported data
 148  * types: S32
 149  * @param[in]  indices_stride_x                         Stride of the source tensor in X dimension
 150  * (in bytes)
 151  * @param[in]  indices_step_x                           indices_stride_x * number of elements along
 152  * X processed per work item (in bytes)
 153  * @param[in]  indices_stride_y                         Stride of the source tensor in Y dimension
 154  * (in bytes)
 155  * @param[in]  indices_step_y                           indices_stride_y * number of elements along
 156  * Y processed per work item (in bytes)
 157  * @param[in]  indices_stride_z                         Stride of the source tensor in Y dimension
 158  * (in bytes)
 159  * @param[in]  indices_step_z                           indices_stride_z * number of elements along
 160  * Z processed per work item (in bytes)
 161  * @param[in]  indices_offset_first_element_in_bytes    Offset of the first element in the source
 162  * tensor
 163  * @param[in]  on_value_ptr                             Pointer to the on_value vector. Supported
 164  * data types: U8/S8/U16/S16/F16/U32/S32/F32.
 165  * @param[in]  on_value_stride_x                        Stride of the on_value vector in X dimension
 166  * (in bytes)
 167  * @param[in]  on_value_step_x                          on_value_stride_x * number of elements along
 168  * X processed per work item (in bytes)
 169  * @param[in]  on_value_offset_first_element_in_bytes   Offset of the first element in the on_value
 170  * vector
 171  * @param[out] output_ptr                               Pointer to the destination tensor. Supported
 172  * data types: same as @p on_value
 173  * @param[in]  output_stride_x                          Stride of the destination tensor in X
 174  * dimension (in bytes)
 175  * @param[in]  output_step_x                            output_stride_x * number of elements along X
 176  * processed per work item (in bytes)
 177  * @param[in]  output_stride_y                          Stride of the destination tensor in Y
 178  * dimension (in bytes)
 179  * @param[in]  output_step_y                            output_stride_y * number of elements along Y
 180  * processed per work item (in bytes)
 181  * @param[in]  output_stride_z                          Stride of the destination tensor in Z
 182  * dimension (in bytes)
 183  * @param[in]  output_step_z                            output_stride_z * number of elements along Z
 184  * processed per work item (in bytes)
 185  * @param[in]  output_stride_w                          Stride of the destination tensor in W
 186  * dimension (in bytes)
 187  * @param[in]  output_step_w                            output_stride_w * number of elements along W
 188  * processed per work item (in bytes)
 189  * @param[in]  output_offset_first_element_in_bytes     Offset of the first element in the
 190  * destination tensor
 191  */
 192 __kernel void one_hot_only_on_value(TENSOR3D_DECLARATION(indices), VECTOR_DECLARATION(on_value),
 193                                     TENSOR4D_DECLARATION(output))
 194 {
 195   const int px = get_global_id(0);
 196   const int py = get_global_id(1);
 197   const int pz = get_global_id(2);
 198
 199   const Tensor3D indices = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(indices);
 200   const Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, OUTPUT_DIM_Z);
 201
 202   const int index = *(__global const int *)tensor3D_offset(&indices, px, py, pz);
 203
 204   if (index < 0 || index >= DEPTH)
 205     return;
 206
 207 #if AXIS == 0
 208   *(__global DATA_TYPE *)tensor4D_offset(&output, index, px, py, pz) =
 209       *((__global const DATA_TYPE *)on_value_ptr);
 210 #elif AXIS == 1
 211   *(__global DATA_TYPE *)tensor4D_offset(&output, px, index, py, pz) =
 212       *((__global const DATA_TYPE *)on_value_ptr);
 213 #elif AXIS == 2
 214   *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, index, pz) =
 215       *((__global const DATA_TYPE *)on_value_ptr);
 216 #elif AXIS == 3
 217   *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, pz, index) =
 218       *((__global const DATA_TYPE *)on_value_ptr);
 219 #endif // AXIS
 220 }
 221
 222 #endif // defined(DATA_TYPE) && defined(AXIS) && defined(DEPTH) && defined(OUTPUT_DIM_Z)