updated readme file due to moving CMake scripts to the root folder
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / cl_kernels / mvn_gpu_ref_within_channels.cl
1 // Copyright (c) 2018-2019 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "include/fetch.cl"
16 #include "include/data_types.cl"
17
18
19 #if FP16_UNIT_USED
20     #define UNIT_CVT_FUNC(val) convert_half(val)
21 #else
22     #define UNIT_CVT_FUNC(val) (val)
23 #endif
24
25
26 KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
27 {
28     const uint b = get_global_id(0);
29     const uint f = get_global_id(1);
30     float mean = 0.f;
31
32     const uint input_first = INPUT0_OFFSET + b * INPUT0_BATCH_PITCH + f * INPUT0_FEATURE_PITCH;
33
34     // Compute mean
35     uint input_idx = input_first;
36     for (uint z = 0; z < INPUT0_SIZE_Z; z++)
37     {
38         for (uint y = 0; y < INPUT0_SIZE_Y; y++)
39         {
40             for (uint x = 0; x < INPUT0_SIZE_X; x++)
41             {
42 #if INPUT0_LAYOUT_BFZYX_F16
43                 input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
44                 mean += (float)input[input_idx];
45              }
46         }
47 #else
48                 mean += (float)input[input_idx];
49                 input_idx += INPUT0_X_PITCH;
50             }
51             input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
52         }
53         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
54 #endif
55     }
56     mean /= INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z;
57
58 #if INPUT0_LAYOUT_BFZYX_F16
59     uint output_idx;
60 #else
61     uint output_idx = OUTPUT_OFFSET + b * OUTPUT_BATCH_PITCH + f * OUTPUT_FEATURE_PITCH;
62 #endif
63 #if NORMALIZE_VARIANCE == 0
64     //subtract mean
65     input_idx = input_first;
66     for (uint z = 0; z < INPUT0_SIZE_Z; z++)
67     {
68         for (uint y = 0; y < INPUT0_SIZE_Y; y++)
69         {
70             for (uint x = 0; x < INPUT0_SIZE_X; x++)
71             {
72 #if INPUT0_LAYOUT_BFZYX_F16
73                 input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
74                 output_idx = GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
75                 output[output_idx] = ACTIVATION(input[input_idx] - UNIT_CVT_FUNC(mean), ACTIVATION_PARAMS);
76             }
77         }
78 #else
79                 output[output_idx] = ACTIVATION(input[input_idx] - UNIT_CVT_FUNC(mean), ACTIVATION_PARAMS);
80                 input_idx += INPUT0_X_PITCH;
81                 output_idx += OUTPUT_X_PITCH;
82             }
83             input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
84             output_idx += OUTPUT_Y_PITCH - INPUT0_SIZE_X*OUTPUT_X_PITCH;
85         }
86         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
87         output_idx += OUTPUT_Z_PITCH - INPUT0_SIZE_Y*OUTPUT_Y_PITCH;
88 #endif
89     }
90 #else //NORMALIZE_VARIANCE
91     float variance = 0.f;
92
93     //compute variance
94     input_idx = input_first;
95     for (uint z = 0; z < INPUT0_SIZE_Z; z++)
96     {
97         for (uint y = 0; y < INPUT0_SIZE_Y; y++)
98         {
99             for (uint x = 0; x < INPUT0_SIZE_X; x++)
100             {
101 #if INPUT0_LAYOUT_BFZYX_F16
102                 input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
103                 float res = (float)input[input_idx] - mean;
104                 variance = fma(res, res, variance);
105             }
106         }
107 #else
108                 float res = (float)input[input_idx] - mean;
109                 variance = fma(res, res, variance);
110                 input_idx += INPUT0_X_PITCH;
111             }
112             input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
113         }
114         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
115 #endif
116     }
117
118     //normalize variance
119     variance /= INPUT0_SIZE_Z * INPUT0_SIZE_Y * INPUT0_SIZE_X;
120     variance = native_powr(variance + (float)EPSILON, -0.5f);
121
122     input_idx = input_first;
123     for (uint z = 0; z < INPUT0_SIZE_Z; z++)
124     {
125         for (uint y = 0; y < INPUT0_SIZE_Y; y++)
126         {
127             for (uint x = 0; x < INPUT0_SIZE_X; x++)
128             {
129 #if INPUT0_LAYOUT_BFZYX_F16
130                 input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
131                 output_idx = GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
132                 output[output_idx] = ACTIVATION((input[input_idx] - UNIT_CVT_FUNC(mean)) * UNIT_CVT_FUNC(variance), ACTIVATION_PARAMS);
133             }
134         }
135 #else
136                 output[output_idx] = ACTIVATION((input[input_idx] - UNIT_CVT_FUNC(mean)) * UNIT_CVT_FUNC(variance), ACTIVATION_PARAMS);
137                 input_idx += INPUT0_X_PITCH;
138                 output_idx += OUTPUT_X_PITCH;
139             }
140             input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
141             output_idx += OUTPUT_Y_PITCH - INPUT0_SIZE_X*OUTPUT_X_PITCH;
142         }
143         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
144         output_idx += OUTPUT_Z_PITCH - INPUT0_SIZE_Y*OUTPUT_Y_PITCH;
145 #endif
146     }
147 #endif
148 }
149
150
151 #undef UNIT_CVT_FUNC