1 /*******************************************************************************
2 * Copyright 2018 Intel Corporation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
17 #ifndef CPU_NSPC_BATCH_NORMALIZATION_HPP
18 #define CPU_NSPC_BATCH_NORMALIZATION_HPP
22 #include "c_types_map.hpp"
23 #include "cpu_batch_normalization_pd.hpp"
24 #include "cpu_engine.hpp"
25 #include "type_helpers.hpp"
32 struct nspc_batch_normalization_fwd_t : public cpu_primitive_t {
33 struct pd_t : public cpu_batch_normalization_fwd_pd_t {
34 pd_t(engine_t *engine, const batch_normalization_desc_t *adesc,
35 const primitive_attr_t *attr,
36 const batch_normalization_fwd_pd_t *hint_fwd_pd)
37 : cpu_batch_normalization_fwd_pd_t(
38 engine, adesc, attr, hint_fwd_pd) {}
40 DECLARE_COMMON_PD_T("nspc_bnorm:any", nspc_batch_normalization_fwd_t);
42 virtual status_t init() override {
43 using namespace prop_kind;
44 using namespace data_type;
45 assert(engine()->kind() == engine_kind::cpu);
47 /* the algorithm requires barriers while switching
48 * between parallelization over N and C dimensions */
49 && mkldnn_thr_syncable()
51 && !has_zero_dim_memory()
52 && desc()->data_desc.data_type == f32
53 && utils::implication(use_scaleshift(),
54 desc()->data_scaleshift_desc.data_type == f32)
55 && utils::one_of(data_pd_.desc()->format, memory_format::nhwc)
56 && (attr()->has_default_values() || this->with_relu_post_op());
58 return status::unimplemented;
60 if (is_training() && fuse_bn_relu())
61 bn_init_default_ws(this, this->workspace_pd_, 8);
63 if (stats_is_src() || is_training()) {
64 memory_desc_t stats_d;
65 dims_t stats_dims = { C() };
66 mkldnn_memory_desc_init(&stats_d, 1, stats_dims, data_type::f32,
68 mean_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
69 variance_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
72 return status::success;
76 typedef typename prec_traits<data_type::f32>::type data_t;
78 nspc_batch_normalization_fwd_t(const pd_t *pd, const input_vector &inputs,
79 const output_vector &outputs);
80 ~nspc_batch_normalization_fwd_t();
81 virtual void execute(event_t *e) {
83 e->set_state(event_t::ready);
87 data_t *stats_reduction_;
88 data_t *tmp_mean_, *tmp_variance_;
89 void execute_forward();
93 struct nspc_batch_normalization_bwd_t : public cpu_primitive_t {
94 struct pd_t : public cpu_batch_normalization_bwd_pd_t {
95 pd_t(engine_t *engine, const batch_normalization_desc_t *adesc,
96 const primitive_attr_t *attr,
97 const batch_normalization_fwd_pd_t *hint_fwd_pd)
98 : cpu_batch_normalization_bwd_pd_t(
99 engine, adesc, attr, hint_fwd_pd) {}
101 DECLARE_COMMON_PD_T("nspc_bnorm:any", nspc_batch_normalization_bwd_t);
103 virtual status_t init() override {
104 using namespace prop_kind;
105 using namespace data_type;
106 assert(engine()->kind() == engine_kind::cpu);
108 /* the algorithm requires barriers while switching
109 * between parallelization over N and C dimensions */
110 && mkldnn_thr_syncable()
112 && !has_zero_dim_memory()
113 && desc()->data_desc.data_type == f32
114 && utils::implication(use_scaleshift(),
115 desc()->data_scaleshift_desc.data_type == f32)
116 && utils::one_of(data_pd_.desc()->format, memory_format::nhwc)
117 && (attr()->has_default_values() || this->with_relu_post_op());
119 return status::unimplemented;
121 if (fuse_bn_relu()) {
122 bn_init_default_ws(this, this->workspace_pd_, 8);
123 const size_t this_ws_sz
124 = memory_desc_wrapper(this->workspace_pd()).size();
126 bool ws_ok = true && hint_fwd_pd_->workspace_pd()
127 && memory_desc_wrapper(hint_fwd_pd_->workspace_pd())
131 return status::unimplemented;
134 return status::success;
138 typedef typename prec_traits<data_type::f32>::type data_t;
140 nspc_batch_normalization_bwd_t(const pd_t *pd, const input_vector &inputs,
141 const output_vector &outputs);
142 ~nspc_batch_normalization_bwd_t();
143 virtual void execute(event_t *e) {
145 e->set_state(event_t::ready);
149 data_t *stats_reduction_;
150 data_t *tmp_diff_scaleshift_;
151 void execute_backward();
160 // vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s