updated readme file due to moving CMake scripts to the root folder
[platform/upstream/dldt.git] / inference-engine / thirdparty / mkl-dnn / src / cpu / ncsp_batch_normalization.hpp
1 /*******************************************************************************
2 * Copyright 2018 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #ifndef CPU_NCSP_BATCH_NORMALIZATION_HPP
18 #define CPU_NCSP_BATCH_NORMALIZATION_HPP
19
20 #include <assert.h>
21
22 #include "c_types_map.hpp"
23 #include "memory_tracking.hpp"
24 #include "type_helpers.hpp"
25 #include "utils.hpp"
26
27 #include "cpu_batch_normalization_pd.hpp"
28
29 namespace mkldnn {
30 namespace impl {
31 namespace cpu {
32
33 template <data_type_t data_type>
34 struct ncsp_batch_normalization_fwd_t : public cpu_primitive_t {
35     struct pd_t : public cpu_batch_normalization_fwd_pd_t {
36         pd_t(engine_t *engine, const batch_normalization_desc_t *adesc,
37                 const primitive_attr_t *attr,
38                 const batch_normalization_fwd_pd_t *hint_fwd_pd)
39             : cpu_batch_normalization_fwd_pd_t(
40                       engine, adesc, attr, hint_fwd_pd) {}
41
42         DECLARE_COMMON_PD_T("ncsp_bnorm:any", ncsp_batch_normalization_fwd_t);
43
44         virtual status_t init() override {
45             using namespace data_type;
46             using namespace prop_kind;
47             assert(engine()->kind() == engine_kind::cpu);
48
49             bool ok = true
50                 && is_fwd()
51                 && !has_zero_dim_memory()
52                 && utils::one_of(desc()->prop_kind, forward_training,
53                         forward_inference)
54                 && desc()->data_desc.data_type == data_type
55                 && IMPLICATION(use_scaleshift(),
56                         desc()->data_scaleshift_desc.data_type == f32)
57                 && utils::everyone_is(f32,
58                         desc()->mean_desc.data_type,
59                         desc()->variance_desc.data_type)
60                 && utils::one_of(data_pd_.desc()->format, memory_format::nchw,
61                         memory_format::ncdhw, memory_format::nc)
62                 && IMPLICATION(data_type == bf16, mayiuse(avx512_core))
63                 && (attr()->has_default_values() || this->with_relu_post_op());
64             if (!ok) return status::unimplemented;
65
66             if (is_training() && fuse_bn_relu())
67                 bn_init_default_ws(this, this->workspace_pd_, 8);
68
69             if (stats_is_src() || is_training()) {
70                 memory_desc_t stats_d;
71                 dims_t stats_dims = { C() };
72                 mkldnn_memory_desc_init(
73                         &stats_d, 1, stats_dims, f32, memory_format::x);
74                 mean_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
75                 variance_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
76             }
77
78             init_scratchpad();
79
80             return success;
81         }
82
83     private:
84         void init_scratchpad() {
85             using namespace memory_tracking::names;
86             auto scratchpad = scratchpad_registry().registrar();
87             if (!stats_is_src()) {
88                 scratchpad.book(key_bnorm_reduction,
89                         sizeof(acc_data_t) * C() * mkldnn_get_max_threads());
90
91                 if (!is_training()) {
92                     scratchpad.book(key_bnorm_tmp_mean, sizeof(acc_data_t) * C());
93                     scratchpad.book(key_bnorm_tmp_var, sizeof(acc_data_t) * C());
94                 }
95             }
96
97             if (data_type == data_type::bf16) {
98                 const int simd_w = 16;
99                 const bool has_spatial = utils::one_of(ndims(), 4, 5);
100                 const int SP = has_spatial ? D() * H() * W() : 1;
101                 const int nbufs = 2;
102                 const size_t bf16cvt_buf_sz = sizeof(acc_data_t) * nbufs
103                     * mkldnn_get_max_threads() * utils::rnd_up(SP, simd_w);
104                 scratchpad.book(key_bnorm_bf16cvt, bf16cvt_buf_sz);
105             }
106         }
107     };
108
109     typedef typename prec_traits<data_type>::type data_t;
110     typedef float acc_data_t;
111
112     ncsp_batch_normalization_fwd_t(const pd_t *apd, const input_vector &inputs,
113             const output_vector &outputs)
114         : cpu_primitive_t(apd, inputs, outputs) {}
115
116     ~ncsp_batch_normalization_fwd_t() {}
117
118     virtual void execute(event_t *e) const {
119         execute_forward();
120         e->set_state(event_t::ready);
121     }
122
123 private:
124     void execute_forward() const;
125     const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
126 };
127
128 template <data_type_t data_type>
129 struct ncsp_batch_normalization_bwd_t : public cpu_primitive_t {
130     struct pd_t : public cpu_batch_normalization_bwd_pd_t {
131         pd_t(engine_t *engine, const batch_normalization_desc_t *adesc,
132                 const primitive_attr_t *attr,
133                 const batch_normalization_fwd_pd_t *hint_fwd_pd)
134             : cpu_batch_normalization_bwd_pd_t(
135                     engine, adesc, attr, hint_fwd_pd) {}
136
137         DECLARE_COMMON_PD_T("ncsp_bnorm:any", ncsp_batch_normalization_bwd_t);
138
139         virtual status_t init() override {
140             using namespace data_type;
141             using namespace prop_kind;
142             assert(engine()->kind() == engine_kind::cpu);
143
144             bool ok = true
145                 && is_bwd()
146                 && !has_zero_dim_memory()
147                 && utils::one_of(desc()->prop_kind, backward, backward_data)
148                 && utils::everyone_is(data_type, desc()->data_desc.data_type,
149                         desc()->diff_data_desc.data_type)
150                 && utils::everyone_is(f32, desc()->mean_desc.data_type,
151                         desc()->variance_desc.data_type)
152                 && IMPLICATION(use_scaleshift(),
153                         desc()->diff_data_scaleshift_desc.data_type == f32
154                         && desc()->data_scaleshift_desc.data_type == f32)
155                 && IMPLICATION(data_type == bf16, mayiuse(avx512_core))
156                 && utils::one_of(data_pd_.desc()->format, memory_format::nchw,
157                         memory_format::ncdhw, memory_format::nc)
158                 && attr()->has_default_values()
159                 && hint_fwd_pd_ != nullptr;
160             if (!ok)
161                 return status::unimplemented;
162
163             if (fuse_bn_relu()) {
164                 bn_init_default_ws(this, this->workspace_pd_, 8);
165                 const size_t this_ws_sz
166                     = memory_desc_wrapper(this->workspace_pd()).size();
167
168                 bool ws_ok = true
169                     && hint_fwd_pd_->workspace_pd()
170                     && memory_desc_wrapper(hint_fwd_pd_->workspace_pd()).size()
171                     == this_ws_sz;
172                 if (!ws_ok) return status::unimplemented;
173             }
174
175             init_scratchpad();
176
177             return status::success;
178         }
179
180     private:
181         void init_scratchpad() {
182             using namespace memory_tracking::names;
183             auto scratchpad = scratchpad_registry().registrar();
184             scratchpad.book(key_bnorm_reduction,
185                     sizeof(acc_data_t) * 2 * C() * mkldnn_get_max_threads());
186             if (!(use_scaleshift() && desc()->prop_kind == prop_kind::backward))
187                 scratchpad.book(key_bnorm_tmp_diff_ss,
188                         sizeof(acc_data_t) * 2 * C());
189
190             if (data_type == data_type::bf16) {
191                 const int simd_w = 16;
192                 const bool has_spatial = utils::one_of(ndims(), 4, 5);
193                 const int SP = has_spatial ? D() * H() * W() : 1;
194                 const int nbufs = 2 + !use_global_stats();
195                 const size_t bf16cvt_buf_sz = sizeof(acc_data_t) * nbufs
196                     * mkldnn_get_max_threads() * utils::rnd_up(SP, simd_w);
197                 scratchpad.book(key_bnorm_bf16cvt, bf16cvt_buf_sz);
198             }
199         }
200     };
201
202     typedef typename prec_traits<data_type>::type data_t;
203     typedef float acc_data_t;
204
205     ncsp_batch_normalization_bwd_t(const pd_t *apd, const input_vector &inputs,
206             const output_vector &outputs)
207         : cpu_primitive_t(apd, inputs, outputs) {}
208
209     ~ncsp_batch_normalization_bwd_t() {}
210
211     virtual void execute(event_t *e) const {
212         execute_backward();
213         e->set_state(event_t::ready);
214     }
215
216 private:
217     void execute_backward() const;
218     const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
219 };
220
221 }
222 }
223 }
224
225 #endif
226
227 // vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s