1 // Copyright (c) 2018 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 #include "include/common.cl"
17 #include "include/data_types.cl"
20 (const __global UNIT_TYPE* input0,
21 const __global UNIT_TYPE* input1,
23 const __global UNIT_TYPE* input2,
25 __global UNIT_TYPE* output)
27 const uint x = (uint)get_global_id(0);
28 const uint y = (uint)get_global_id(1);
29 const uint b = (uint)get_global_id(2);
35 for (uint i = 0; i < Y1; ++i)
37 in0_idx = i * X1 + x + b * X1 * Y1;
39 for (uint i = 0; i < X1; ++i)
41 in0_idx = x * X1 + i + b * X1 * Y1;
45 in1_idx = y * X2 + i + b * X2 * Y2;
47 in1_idx = i * X2 + y + b * X2 * Y2;
50 value = fma(input0[in0_idx], input1[in1_idx], value);
52 #if TRANSPOSE_INPUT1 && TRANSPOSE_INPUT2
53 uint out_idx = x * Y2 + y + b * X1 * Y2;
54 #elif TRANSPOSE_INPUT1
55 uint out_idx = x * X2 + y + b * X1 * Y1;
56 #elif TRANSPOSE_INPUT2
57 uint out_idx = x * Y2 + y + b * X2 * Y2;
59 uint out_idx = x * X2 + y + b * X2 * Y1;
64 beta_out = BETA * input2[out_idx];
66 output[out_idx] = fma(ALPHA, value, beta_out);