1 // Copyright (c) 2018 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "include/common.cl"
16 #include "include/data_types.cl"
20 #define UNIT_CVT_FUNC(val) convert_half(val)
22 #define UNIT_CVT_FUNC(val) (val)
26 KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
28 const uint b = get_global_id(0);
29 const uint f = get_global_id(1);
32 const uint input_first = INPUT0_OFFSET + b * INPUT0_BATCH_PITCH + f * INPUT0_FEATURE_PITCH;
35 uint input_idx = input_first;
36 for (uint y = 0; y < INPUT0_SIZE_Y; y++)
38 for (uint x = 0; x < INPUT0_SIZE_X; x++)
40 mean += (float)input[input_idx];
41 input_idx += INPUT0_X_PITCH;
43 input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
45 mean /= INPUT0_SIZE_X * INPUT0_SIZE_Y;
47 uint output_idx = OUTPUT_OFFSET + b * OUTPUT_BATCH_PITCH + f * OUTPUT_FEATURE_PITCH;
49 #if NORMALIZE_VARIANCE == 0
51 input_idx = input_first;
52 for (uint y = 0; y < INPUT0_SIZE_Y; y++)
54 for (uint x = 0; x < INPUT0_SIZE_X; x++)
56 output[output_idx] = ACTIVATION(input[input_idx] - UNIT_CVT_FUNC(mean), NL_M, NL_N);
57 input_idx += INPUT0_X_PITCH;
58 output_idx += OUTPUT_X_PITCH;
60 input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
61 output_idx += OUTPUT_Y_PITCH - INPUT0_SIZE_X*OUTPUT_X_PITCH;
63 #else //NORMALIZE_VARIANCE
67 input_idx = input_first;
68 for (uint y = 0; y < INPUT0_SIZE_Y; y++)
70 for (uint x = 0; x < INPUT0_SIZE_X; x++)
72 float res = (float)input[input_idx] - mean;
73 variance = fma(res, res, variance);
74 input_idx += INPUT0_X_PITCH;
76 input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
80 variance /= INPUT0_SIZE_Y * INPUT0_SIZE_X;
81 variance = native_powr(variance + (float)EPSILON, -0.5f);
83 input_idx = input_first;
84 for (uint y = 0; y < INPUT0_SIZE_Y; y++)
86 for (uint x = 0; x < INPUT0_SIZE_X; x++)
88 output[output_idx] = ACTIVATION((input[input_idx] - UNIT_CVT_FUNC(mean)) * UNIT_CVT_FUNC(variance), NL_M, NL_N);
89 input_idx += INPUT0_X_PITCH;
90 output_idx += OUTPUT_X_PITCH;
92 input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
93 output_idx += OUTPUT_Y_PITCH - INPUT0_SIZE_X*OUTPUT_X_PITCH;