1 // Copyright (c) 2016-2017 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 #include "include/include_all.cl"
17 #include "include/reshape_dims.cl"
19 // Required JIT constants:
20 // - FP16_SUPPORTED - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
21 // - FP16_UNIT_USED - [0/1] Value indicating that current kernel should use FP16.
22 // - UNIT_TYPE - Type of unit of input/output/weights/biases.
23 // - UNIT_VAL_ZERO - Literal of current UNIT_TYPE that represents 0.
24 // - INPUT_BATCH_NUM - [int] Number of elements from single spatial and single feature that are grouped in single batch in input.
25 // - INPUT_ELEMENTS_COUNT - [int] Cumulative number of elements from input that are processed in single batch.
26 // - FILTER_OFM_NUM - [int] Cumulative number of elements that are outputted in single batch.
27 // - RELU - [0/1] Indicates that ReLU activation function should be used on output.
28 // - NEGATIVE_SLOPE - [float] Factor for negative output values (required when ReLU is specified).
30 KERNEL (fully_connected_gpu_yxfn)(
31 const __global INPUT0_TYPE* input,
32 __global OUTPUT_TYPE* output,
33 const __global FILTER_TYPE* weights
35 , const __global BIAS_TYPE* biases
39 const uint x = get_global_id(0);
40 const uint batch_id = x % INPUT0_BATCH_NUM;
41 const uint neuronIdx = x / INPUT0_BATCH_NUM;
43 UNIT_TYPE result = UNIT_VAL_ZERO;
45 uint weight_offset = neuronIdx * FILTER_OFM_PITCH;
46 for (uint k = 0; k < INPUT0_FEATURE_NUM; k++)
48 for (uint j = 0; j < INPUT0_SIZE_Y; j++)
50 for(uint i = 0; i < INPUT0_SIZE_X; i++)
52 uint4 widx = FUNC(reshape_dims)(batch_id, k,j,i, INPUT0_SIZE_Y, INPUT0_SIZE_X, FILTER_SIZE_Y, FILTER_SIZE_X, INPUT0_DIMS, FILTER_DIMS);
53 uint weight_idx = weight_offset + widx[1]*FILTER_IFM_PITCH + widx[2]*FILTER_Y_PITCH + widx[3]*FILTER_X_PITCH;
54 uint input_idx = INPUT0_OFFSET + k*INPUT0_FEATURE_PITCH + j*INPUT0_Y_PITCH + i*INPUT0_X_PITCH + batch_id*INPUT0_BATCH_PITCH;
55 result += input[input_idx] * weights[weight_idx];
59 const uint output_idx = OUTPUT_OFFSET + batch_id*OUTPUT_BATCH_PITCH + neuronIdx*OUTPUT_FEATURE_PITCH;
62 result += biases[neuronIdx];
64 output[output_idx] = ACTIVATION(result, NL_M, NL_N);