1 // Copyright (c) 2016-2017 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 #include "include/include_all.cl"
19 #define UNIT_CVT_FUNC(val) convert_half(val)
21 #define UNIT_CVT_FUNC(val) (val)
25 KERNEL (normalize_gpu_within_spatial_bfyx)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output, const __global UNIT_TYPE* scale_input)
27 const uint x = get_global_id(0);
28 const uint y = get_global_id(1);
29 const uint b = get_global_id(2);
31 const uint input_first = INPUT0_OFFSET + b*INPUT0_BATCH_PITCH + y*INPUT0_Y_PITCH + x*INPUT0_X_PITCH;
34 uint input_idx = input_first;
36 for (int i = 0; i < INPUT0_FEATURE_NUM; i++)
38 float value = (float)input[input_idx];
39 norm = mad(value, value, norm);
40 input_idx += INPUT0_FEATURE_PITCH;
43 uint output_idx = OUTPUT_OFFSET + b*OUTPUT_BATCH_PITCH + y*OUTPUT_Y_PITCH + x*OUTPUT_X_PITCH;
51 norm = native_powr(norm, -0.5f);
55 input_idx = input_first;
56 for (int f = 0; f < INPUT0_FEATURE_NUM; f++)
58 #if SCALE_TABLE_FEATURE_NUM == 1
59 const uint scale_index = 0;
60 #elif INPUT0_FEATURE_NUM <= SCALE_TABLE_FEATURE_NUM
61 const uint scale_index = f;
63 const uint scale_index = f % SCALE_TABLE_FEATURE_NUM;
66 output[output_idx] = ACTIVATION(UNIT_CVT_FUNC(norm) * input[input_idx] * scale_input[scale_index], NL_M, NL_N);
67 output_idx += OUTPUT_FEATURE_PITCH;
68 input_idx += INPUT0_FEATURE_PITCH;