inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.hpp

   1 /*******************************************************************************
   2 * Copyright 2018 Intel Corporation
   3 *
   4 * Licensed under the Apache License, Version 2.0 (the "License");
   5 * you may not use this file except in compliance with the License.
   6 * You may obtain a copy of the License at
   7 *
   8 *     http://www.apache.org/licenses/LICENSE-2.0
   9 *
  10 * Unless required by applicable law or agreed to in writing, software
  11 * distributed under the License is distributed on an "AS IS" BASIS,
  12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 * See the License for the specific language governing permissions and
  14 * limitations under the License.
  15 *******************************************************************************/
  16
  17 #ifndef CPU_NSPC_BATCH_NORMALIZATION_HPP
  18 #define CPU_NSPC_BATCH_NORMALIZATION_HPP
  19
  20 #include <assert.h>
  21
  22 #include "c_types_map.hpp"
  23 #include "memory_tracking.hpp"
  24 #include "type_helpers.hpp"
  25 #include "utils.hpp"
  26
  27 #include "cpu_batch_normalization_pd.hpp"
  28
  29 namespace mkldnn {
  30 namespace impl {
  31 namespace cpu {
  32
  33 struct nspc_batch_normalization_fwd_t : public cpu_primitive_t {
  34     struct pd_t : public cpu_batch_normalization_fwd_pd_t {
  35         pd_t(engine_t *engine, const batch_normalization_desc_t *adesc,
  36                 const primitive_attr_t *attr,
  37                 const batch_normalization_fwd_pd_t *hint_fwd_pd)
  38             : cpu_batch_normalization_fwd_pd_t(
  39                       engine, adesc, attr, hint_fwd_pd) {}
  40
  41         DECLARE_COMMON_PD_T("nspc_bnorm:any", nspc_batch_normalization_fwd_t);
  42
  43         virtual status_t init() override {
  44             using namespace data_type;
  45             using namespace prop_kind;
  46
  47             assert(engine()->kind() == engine_kind::cpu);
  48
  49             bool ok = true
  50                 /* the algorithm requires barriers while switching
  51                  * between parallelization over N and C dimensions */
  52                 && mkldnn_thr_syncable()
  53                 && is_fwd()
  54                 && !has_zero_dim_memory()
  55                 && desc()->data_desc.data_type == f32
  56                 && IMPLICATION(use_scaleshift(),
  57                         desc()->data_scaleshift_desc.data_type == f32)
  58                 && utils::one_of(data_pd_.desc()->format, memory_format::nhwc)
  59                 && (attr()->has_default_values() || this->with_relu_post_op());
  60             if (!ok) return status::unimplemented;
  61
  62             if (is_training() && fuse_bn_relu())
  63                 bn_init_default_ws(this, this->workspace_pd_, 8);
  64
  65             if (stats_is_src() || is_training()) {
  66                 memory_desc_t stats_d;
  67                 dims_t stats_dims = { C() };
  68                 mkldnn_memory_desc_init(&stats_d, 1, stats_dims,
  69                         data_type::f32, memory_format::x);
  70                 mean_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
  71                 variance_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
  72             }
  73
  74             init_scratchpad();
  75
  76             return status::success;
  77         }
  78
  79     private:
  80         void init_scratchpad() {
  81             using namespace memory_tracking::names;
  82             auto scratchpad = scratchpad_registry().registrar();
  83             if (!stats_is_src()) {
  84                 int sz = nstl::max(C(), 16) * mkldnn_get_max_threads();
  85                 scratchpad.book(key_bnorm_reduction, sizeof(data_t) * sz);
  86                 scratchpad.book(key_bnorm_tmp_mean, sizeof(data_t) * sz);
  87                 scratchpad.book(key_bnorm_tmp_var, sizeof(data_t) * sz);
  88             }
  89         }
  90     };
  91
  92     typedef typename prec_traits<data_type::f32>::type data_t;
  93
  94     nspc_batch_normalization_fwd_t(const pd_t *apd, const input_vector &inputs,
  95             const output_vector &outputs)
  96         : cpu_primitive_t(apd, inputs, outputs) {}
  97     ~nspc_batch_normalization_fwd_t() {}
  98
  99     virtual void execute(event_t *e) const {
 100         execute_forward();
 101         e->set_state(event_t::ready);
 102     }
 103
 104 private:
 105     void execute_forward() const;
 106     const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 107 };
 108
 109 struct nspc_batch_normalization_bwd_t : public cpu_primitive_t {
 110     struct pd_t : public cpu_batch_normalization_bwd_pd_t {
 111         pd_t(engine_t *engine, const batch_normalization_desc_t *adesc,
 112                 const primitive_attr_t *attr,
 113                 const batch_normalization_fwd_pd_t *hint_fwd_pd)
 114             : cpu_batch_normalization_bwd_pd_t(
 115                       engine, adesc, attr, hint_fwd_pd) {}
 116
 117         DECLARE_COMMON_PD_T("nspc_bnorm:any", nspc_batch_normalization_bwd_t);
 118
 119         virtual status_t init() override {
 120             using namespace data_type;
 121             using namespace prop_kind;
 122
 123             assert(engine()->kind() == engine_kind::cpu);
 124
 125             bool ok = true
 126                 /* the algorithm requires barriers while switching
 127                  * between parallelization over N and C dimensions */
 128                 && mkldnn_thr_syncable()
 129                 && is_bwd()
 130                 && !has_zero_dim_memory()
 131                 && desc()->data_desc.data_type == f32
 132                 && IMPLICATION(use_scaleshift(),
 133                         desc()->data_scaleshift_desc.data_type == f32)
 134                 && utils::one_of(data_pd_.desc()->format, memory_format::nhwc)
 135                 && (attr()->has_default_values() || this->with_relu_post_op());
 136             if (!ok) return status::unimplemented;
 137
 138             if (fuse_bn_relu()) {
 139                 bn_init_default_ws(this, this->workspace_pd_, 8);
 140                 const size_t this_ws_sz
 141                     = memory_desc_wrapper(this->workspace_pd()).size();
 142
 143                 bool ws_ok = true
 144                     && hint_fwd_pd_->workspace_pd()
 145                     && memory_desc_wrapper(hint_fwd_pd_->workspace_pd()).size()
 146                     == this_ws_sz;
 147                 if (!ws_ok) return status::unimplemented;
 148             }
 149
 150             init_scratchpad();
 151
 152             return status::success;
 153         }
 154
 155     private:
 156         void init_scratchpad() {
 157             using namespace memory_tracking::names;
 158             auto scratchpad = scratchpad_registry().registrar();
 159             scratchpad.book(key_bnorm_reduction,
 160                     sizeof(data_t) * 2 * C() * mkldnn_get_max_threads());
 161             scratchpad.book(key_bnorm_tmp_diff_ss, sizeof(data_t) * 2 * C()
 162                     * (mkldnn_get_max_threads() + 1));
 163         }
 164     };
 165
 166     typedef typename prec_traits<data_type::f32>::type data_t;
 167
 168     nspc_batch_normalization_bwd_t(const pd_t *apd, const input_vector &inputs,
 169             const output_vector &outputs)
 170         : cpu_primitive_t(apd, inputs, outputs) {}
 171     ~nspc_batch_normalization_bwd_t() {}
 172
 173     virtual void execute(event_t *e) const {
 174         execute_backward();
 175         e->set_state(event_t::ready);
 176     }
 177
 178 private:
 179     void execute_backward() const;
 180     const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 181 };
 182
 183 }
 184 }
 185 }
 186
 187 #endif
 188
 189 // vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s