2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Copyright (c) 2017 ARM Limited.
20 * SPDX-License-Identifier: MIT
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to
24 * deal in the Software without restriction, including without limitation the
25 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26 * sell copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
29 * The above copyright notice and this permission notice shall be included in all
30 * copies or substantial portions of the Software.
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
47 #if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS)
48 /** Perform embedding_lookup of input tensor
50 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
52 * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
54 * @attention Output tensor depth should be given as a preprocessor argument using
55 * -DDEPTH_OUT=depth. e.g. -DDEPTH_OUT=16
56 * @attention Number of input dimensions are passed as a preprocessor argument using
57 * -DNUM_DIMS=size, e.g. -DNUM_DIMS=4
59 * @param[in] input_ptr Pointer to the source tensor. Supported data
60 * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
61 * @param[in] input_stride_x Stride of the source tensor in X dimension (in
63 * @param[in] input_step_x input_stride_x * number of elements along X
64 * processed per workitem(in bytes)
65 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in
67 * @param[in] input_step_y input_stride_y * number of elements along Y
68 * processed per workitem(in bytes)
69 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
71 * @param[in] input_step_z input_stride_z * number of elements along Z
72 * processed per workitem(in bytes)
73 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
75 * @param[in] input_stride_w Stride of the source tensor in W dimension (in
77 * @param[in] input_step_w output_stride_w * number of elements along W
78 * processed per workitem(in bytes)
79 * @param[out] output_ptr Pointer to the destination tensor. Supported
80 * data types: same as @p input_ptr
81 * @param[in] output_stride_x Stride of the destination tensor in X dimension
83 * @param[in] output_step_x output_stride_x * number of elements along X
84 * processed per workitem(in bytes)
85 * @param[in] output_stride_y Stride of the destination tensor in Y dimension
87 * @param[in] output_step_y output_stride_y * number of elements along Y
88 * processed per workitem(in bytes)
89 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
91 * @param[in] output_step_z output_stride_z * number of elements along Z
92 * processed per workitem(in bytes)
93 * @param[in] output_stride_w Stride of the source tensor in W dimension (in
95 * @param[in] output_step_w output_stride_w * number of elements along W
96 * processed per workitem(in bytes)
97 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
99 * @param[in] lookups_ptr Pointer to the lookups vector. Supported data
101 * @param[in] lookups_stride_x Stride of the lookups vector in X dimension (in
103 * @param[in] lookups_step_x lookups_stride_x * number of elements along X
104 * processed per workitem(in bytes)
105 * @param[in] lookups_offset_first_element_in_bytes The offset of the first element in the lookups
109 __kernel void embedding_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output),
110 VECTOR_DECLARATION(lookups))
112 Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
113 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, DEPTH_OUT);
115 Vector lups = CONVERT_TO_VECTOR_STRUCT_NO_STEP(lookups);
117 // lookup ids for based on the tensor dimensions
121 (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0);
123 (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1);
124 lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2)))
125 : get_global_id(2) % DEPTH_OUT;
126 lup_id[3] = (NUM_DIMS == 4)
127 ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
128 : get_global_id(2) / DEPTH_OUT;
130 in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x +
131 lup_id[1] * input_step_y + lup_id[2] * input_step_z + lup_id[3] * input_step_w;
134 (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0,
135 (__global DATA_TYPE *)out.ptr);
137 #endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS)