Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / mkl-dnn / src / cpu / jit_uni_dw_convolution.hpp
1 /*******************************************************************************
2 * Copyright 2018 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #ifndef CPU_JIT_UNI_DW_CONVOLUTION_HPP
18 #define CPU_JIT_UNI_DW_CONVOLUTION_HPP
19
20 #include "c_types_map.hpp"
21 #include "memory_tracking.hpp"
22
23 #include "cpu_barrier.hpp"
24 #include "cpu_convolution_pd.hpp"
25 #include "cpu_reducer.hpp"
26
27 #include "jit_uni_dw_conv_kernel_f32.hpp"
28
29 namespace mkldnn {
30 namespace impl {
31 namespace cpu {
32
33 template <cpu_isa_t isa>
34 struct _jit_uni_dw_convolution_fwd_t: public cpu_primitive_t {
35     struct pd_t: public cpu_convolution_fwd_pd_t {
36         pd_t(engine_t *engine, const convolution_desc_t *adesc,
37                 const primitive_attr_t *attr,
38                 const typename pd_t::base_class *hint_fwd_pd)
39             : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
40             , jcp_() {}
41
42         DECLARE_COMMON_PD_T(
43                 JIT_IMPL_NAME_HELPER("jit_dw:", isa, ""),
44                 _jit_uni_dw_convolution_fwd_t<isa>);
45
46         virtual status_t init() override {
47             using namespace prop_kind;
48             assert(this->engine()->kind() == engine_kind::cpu);
49             bool ok = true
50                 && this->set_default_params() == status::success
51                 && utils::one_of(this->desc()->prop_kind, forward_training,
52                         forward_inference)
53                 && utils::one_of(this->desc()->alg_kind,
54                            alg_kind::convolution_auto,
55                            alg_kind::convolution_direct)
56                 && !this->has_zero_dim_memory()
57                 && utils::everyone_is(data_type::f32,
58                         this->desc()->src_desc.data_type,
59                         this->desc()->weights_desc.data_type,
60                         this->desc()->dst_desc.data_type)
61                 && IMPLICATION(this->with_bias(),
62                         data_type::f32 == this->desc()->bias_desc.data_type);
63
64             if (!ok) return status::unimplemented;
65
66             status_t status = jit_uni_dw_conv_fwd_kernel_f32<isa>::init_conf(
67                     jcp_, *this->desc(), this->src_pd_.desc(),
68                     *this->weights_pd_.desc(), *this->dst_pd_.desc(),
69                     *this->attr());
70             if (status != status::success) return status;
71
72             auto scratchpad = scratchpad_registry().registrar();
73             jit_uni_dw_conv_fwd_kernel_f32<isa>::init_scratchpad(scratchpad,
74                     jcp_);
75
76             return status::success;
77         }
78
79         jit_conv_conf_t jcp_;
80
81     protected:
82         virtual status_t set_default_params() override {
83             using namespace memory_format;
84             auto desired_act_fmt = isa == avx512_common ? nChw16c : nChw8c;
85             auto desired_wei_fmt = isa == avx512_common ? Goihw16g : Goihw8g;
86
87             if (this->src_pd_.desc()->format == any)
88                 CHECK(this->src_pd_.set_format(desired_act_fmt));
89             if (this->dst_pd_.desc()->format == any)
90                 CHECK(this->dst_pd_.set_format(desired_act_fmt));
91             if (this->weights_pd_.desc()->format == any)
92                 CHECK(this->weights_pd_.set_format(desired_wei_fmt));
93             if (this->bias_pd_.desc()->format == any)
94                 CHECK(this->bias_pd_.set_format(x));
95             if (this->desc()->alg_kind == alg_kind::convolution_auto)
96                 CHECK(this->set_alg_kind(alg_kind::convolution_direct));
97             return status::success;
98         }
99     };
100
101     _jit_uni_dw_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
102             const output_vector &outputs)
103         : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr)
104     { kernel_ = new jit_uni_dw_conv_fwd_kernel_f32<isa>(pd()->jcp_, *pd()->attr()); }
105
106     ~_jit_uni_dw_convolution_fwd_t() { delete kernel_; }
107
108     typedef typename prec_traits<data_type::f32>::type data_t;
109
110     virtual void execute(event_t *e) const {
111         execute_forward();
112         e->set_state(event_t::ready);
113     }
114
115 private:
116     void execute_forward() const;
117     const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
118
119     jit_uni_dw_conv_fwd_kernel_f32<isa> *kernel_;
120 };
121
122 using jit_avx512_common_dw_convolution_fwd_t =
123     _jit_uni_dw_convolution_fwd_t<avx512_common>;
124 using jit_avx2_dw_convolution_fwd_t = _jit_uni_dw_convolution_fwd_t<avx2>;
125 using jit_sse42_dw_convolution_fwd_t = _jit_uni_dw_convolution_fwd_t<sse42>;
126
127 template <cpu_isa_t isa>
128 struct _jit_uni_dw_convolution_bwd_data_t: public cpu_primitive_t {
129     struct pd_t: public cpu_convolution_bwd_data_pd_t {
130         pd_t(engine_t *engine,
131                 const convolution_desc_t *adesc,
132                 const primitive_attr_t *attr,
133                 const convolution_fwd_pd_t *hint_fwd_pd)
134             : cpu_convolution_bwd_data_pd_t(engine, adesc, attr, hint_fwd_pd)
135             , jcp_()
136         {}
137
138         DECLARE_COMMON_PD_T(
139                 JIT_IMPL_NAME_HELPER("jit_dw:", isa, ""),
140                 _jit_uni_dw_convolution_bwd_data_t);
141
142         virtual status_t init() override {
143             using namespace prop_kind;
144
145             assert(this->engine()->kind() == engine_kind::cpu);
146             bool ok = true
147                 && this->set_default_params() == status::success
148                 && utils::one_of(this->desc()->prop_kind, backward,
149                         backward_data)
150                 && utils::one_of(this->desc()->alg_kind,
151                            alg_kind::convolution_auto,
152                            alg_kind::convolution_direct)
153                 && !this->has_zero_dim_memory()
154                 && utils::everyone_is(data_type::f32,
155                         this->desc()->diff_src_desc.data_type,
156                         this->desc()->weights_desc.data_type,
157                         this->desc()->diff_dst_desc.data_type);
158
159             if (!ok) return status::unimplemented;
160
161             status_t status =
162                 jit_uni_dw_conv_bwd_data_kernel_f32<isa>::init_conf(jcp_,
163                         *this->desc(), *this->diff_src_pd_.desc(),
164                         *this->weights_pd_.desc(), *this->diff_dst_pd_.desc());
165             if (status != status::success) return status;
166
167             auto scratchpad = scratchpad_registry().registrar();
168             jit_uni_dw_conv_bwd_data_kernel_f32<isa>::init_scratchpad(
169                     scratchpad, jcp_);
170
171             return status::success;
172         }
173
174         jit_conv_conf_t jcp_;
175
176     protected:
177         virtual status_t set_default_params() override {
178             using namespace memory_format;
179             auto desired_act_fmt = isa == avx512_common ? nChw16c : nChw8c;
180             auto desired_wei_fmt = isa == avx512_common ? Goihw16g : Goihw8g;
181
182             if (this->diff_src_pd_.desc()->format == any)
183                 CHECK(this->diff_src_pd_.set_format(desired_act_fmt));
184             if (this->diff_dst_pd_.desc()->format == any)
185                 CHECK(this->diff_dst_pd_.set_format(desired_act_fmt));
186             if (this->weights_pd_.desc()->format == any)
187                 CHECK(this->weights_pd_.set_format(desired_wei_fmt));
188             if (this->desc()->alg_kind == alg_kind::convolution_auto)
189                 CHECK(this->set_alg_kind(alg_kind::convolution_direct));
190
191             return status::success;
192         }
193     };
194
195     _jit_uni_dw_convolution_bwd_data_t(const pd_t *apd,
196             const input_vector &inputs, const output_vector &outputs)
197         : cpu_primitive_t(apd, inputs, outputs)
198     { kernel_ = new jit_uni_dw_conv_bwd_data_kernel_f32<isa>(pd()->jcp_); }
199     ~_jit_uni_dw_convolution_bwd_data_t() { delete kernel_; };
200
201     typedef typename prec_traits<data_type::f32>::type data_t;
202
203     virtual void execute(event_t *e) const {
204         switch (pd()->desc()->prop_kind) {
205         case prop_kind::backward_data:
206             execute_backward_data();
207             break;
208         default:
209             assert(!"invalid prop_kind");
210         }
211         e->set_state(event_t::ready);
212     }
213
214 private:
215     void execute_backward_data() const;
216     const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
217
218     jit_uni_dw_conv_bwd_data_kernel_f32<isa> *kernel_;
219 };
220
221 using jit_avx512_common_dw_convolution_bwd_data_t =
222     _jit_uni_dw_convolution_bwd_data_t<avx512_common>;
223 using jit_avx2_dw_convolution_bwd_data_t =
224     _jit_uni_dw_convolution_bwd_data_t<avx2>;
225 using jit_sse42_dw_convolution_bwd_data_t =
226     _jit_uni_dw_convolution_bwd_data_t<sse42>;
227
228 template <cpu_isa_t isa>
229 struct _jit_uni_dw_convolution_bwd_weights_t: public cpu_primitive_t {
230     struct pd_t: public cpu_convolution_bwd_weights_pd_t {
231         pd_t(engine_t *engine,
232                 const convolution_desc_t *adesc,
233                 const primitive_attr_t *attr,
234                 const convolution_fwd_pd_t *hint_fwd_pd)
235             : cpu_convolution_bwd_weights_pd_t(engine, adesc, attr, hint_fwd_pd)
236             , jcp_() {}
237
238         DECLARE_COMMON_PD_T(
239                 JIT_IMPL_NAME_HELPER("jit_dw:", isa, ""),
240                 _jit_uni_dw_convolution_bwd_weights_t<isa>);
241
242         virtual status_t init() override {
243             using namespace prop_kind;
244
245             assert(this->engine()->kind() == engine_kind::cpu);
246             bool ok = true
247                 && this->set_default_params() == status::success
248                 && this->desc()->prop_kind == prop_kind::backward_weights
249                 && utils::one_of(this->desc()->alg_kind,
250                            alg_kind::convolution_auto,
251                            alg_kind::convolution_direct)
252                 && utils::everyone_is(data_type::f32,
253                         this->desc()->src_desc.data_type,
254                         this->desc()->diff_weights_desc.data_type,
255                         this->desc()->diff_dst_desc.data_type);
256
257             if (!ok) return status::unimplemented;
258
259             const int max_threads = mkldnn_in_parallel()
260                 ? 1 : mkldnn_get_max_threads();
261
262             status_t status =
263                 jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_conf(jcp_,
264                         *this->desc(), *this->src_pd_.desc(),
265                         *this->diff_weights_pd_.desc(),
266                         *this->diff_dst_pd_.desc(), max_threads);
267             if (status != status::success) return status;
268
269             auto scratchpad = scratchpad_registry().registrar();
270             jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_scratchpad(
271                     scratchpad, jcp_);
272
273             return status::success;
274         }
275
276         jit_conv_conf_t jcp_;
277
278     protected:
279         virtual status_t set_default_params() override {
280             using namespace memory_format;
281             auto desired_act_fmt = isa == avx512_common ? nChw16c : nChw8c;
282             auto desired_wei_fmt = isa == avx512_common ? Goihw16g : Goihw8g;
283
284             if (this->src_pd_.desc()->format == any)
285                 CHECK(this->src_pd_.set_format(desired_act_fmt));
286             if (this->diff_dst_pd_.desc()->format == any)
287                 CHECK(this->diff_dst_pd_.set_format(desired_act_fmt));
288             if (this->diff_weights_pd_.desc()->format == any)
289                 CHECK(this->diff_weights_pd_.set_format(desired_wei_fmt));
290             if (this->diff_bias_pd_.desc()->format == any)
291                 CHECK(this->diff_bias_pd_.set_format(x));
292             if (this->desc()->alg_kind == alg_kind::convolution_auto)
293                 CHECK(this->set_alg_kind(alg_kind::convolution_direct));
294
295             return status::success;
296         }
297     };
298
299     _jit_uni_dw_convolution_bwd_weights_t(const pd_t *apd,
300             const input_vector &inputs, const output_vector &outputs);
301
302     ~_jit_uni_dw_convolution_bwd_weights_t() {
303         delete kernel_;
304         delete acc_ker_;
305     };
306
307     typedef typename prec_traits<data_type::f32>::type data_t;
308
309     virtual void execute(event_t *e) const {
310         execute_backward_weights();
311         e->set_state(event_t::ready);
312     }
313
314 private:
315     void execute_backward_weights() const;
316     bool do_parallel_reduction() const { return false; }
317     const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
318
319     jit_uni_dw_conv_bwd_weights_kernel_f32<isa> *kernel_;
320     cpu_accumulator_1d_t<data_type::f32> *acc_ker_;
321 };
322
323 using jit_avx512_common_dw_convolution_bwd_weights_t =
324     _jit_uni_dw_convolution_bwd_weights_t<avx512_common>;
325 using jit_avx2_dw_convolution_bwd_weights_t =
326     _jit_uni_dw_convolution_bwd_weights_t<avx2>;
327 using jit_sse42_dw_convolution_bwd_weights_t =
328     _jit_uni_dw_convolution_bwd_weights_t<sse42>;
329
330 }
331 }
332 }
333
334 #endif