1 /*******************************************************************************
2 * Copyright 2018 Intel Corporation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
25 #include "dnn_types.hpp"
26 #include "mkldnn_common.hpp"
27 #include "mkldnn_debug.hpp"
28 #include "mkldnn_memory.hpp"
32 extern const char *perf_template;
34 enum alg_t { VANILLA_RNN, VANILLA_LSTM, VANILLA_GRU, LBR_GRU };
35 alg_t str2alg(const char *str);
36 const char *alg2str(alg_t alg);
37 mkldnn_alg_kind_t alg2kind(alg_t alg);
39 enum activation_t { RELU, LOGISTIC, TANH };
40 activation_t str2activation(const char *str);
41 const char *activation2str(activation_t alg);
42 mkldnn_alg_kind_t activation2kind(activation_t alg);
44 mkldnn_prop_kind_t str2prop(const char *str);
45 const char *prop2str(mkldnn_prop_kind_t prop);
47 mkldnn_rnn_direction_t str2direction(const char *str);
48 const char *direction2str(mkldnn_rnn_direction_t direction);
53 template <typename Telem>
54 struct array_offset_calculator {
55 template <typename... Targs>
56 array_offset_calculator(Telem *base, Targs... Fargs)
57 : _size(sizeof...(Fargs)) {
58 const int init_list[] = { Fargs... };
59 _dims = new int[_size];
60 for (int i = 0; i < _size; ++i)
61 _dims[i] = init_list[i];
65 ~array_offset_calculator() { delete[] _dims; }
66 template <typename... Targs>
67 inline Telem &operator()(Targs... Fargs) {
68 return *(_base_ptr + _offset(1, Fargs...));
72 template <typename... Targs>
73 inline int _offset(int const dimension, int element) {
77 template <typename... Targs>
78 inline int _offset(int const dimension, int theta, int element) {
79 return element + (_dims[dimension] * theta);
82 template <typename... Targs>
84 int const dimension, int theta, int element, Targs... Fargs) {
85 int t_prime = element + (_dims[dimension] * theta);
86 return _offset(dimension + 1, t_prime, Fargs...);
104 int str2desc(rnn_desc_t *desc, const char *str);
106 enum rnn_data_kind_t {
116 dst_diff_weights_input,
117 dst_diff_weights_states,
121 data_kind_total // should be last to provide the total number of data kinds
124 inline const char *rnn_data_kind2str(rnn_data_kind_t kind) {
126 case input: return "INPUT";
127 case states: return "STATES";
128 case weights_input: return "WEIGHTS_INPUT";
129 case weights_states: return "WEIGHTS_STATES";
130 case bias: return "BIAS";
131 case dst_last_layer: return "DST_LAST_LAYER";
132 case dst_last_iteration: return "DST_LAST_ITERATION";
134 assert(!"incorrect rnn data kind");
135 return "incorrect rnn data kind";
139 /** configuration structure, that controls initial data filling + error check
141 * dt defines precision
143 * for each lst data kind the values are filled as follows:
144 * if (rand() > f_sparsity) then:
147 * v <-- f_min + rand() * f_step % (f_max - f_min)
149 * on final check the resulting values should be in [min .. max] range, the
150 * relative difference should not exceed eps
153 typedef struct dt_conf_t {
154 mkldnn_data_type_t dt;
155 int min, max; /* representative */
156 int f_min, f_max; /* fill range */
157 float f_mean, f_var; /* mean and variance of normally distributed data */
158 double eps; /* acceptable error */
159 } _dt_conf_t[data_kind_total];
161 extern const _dt_conf_t conf_f32;
162 extern const _dt_conf_t conf_u8u8u8u8;
163 extern const _dt_conf_t conf_u8u8u8f32;
164 extern const _dt_conf_t conf_f32u8f32f32;
165 extern const _dt_conf_t conf_f32u8f32u8;
167 const dt_conf_t *str2cfg(const char *str);
168 const char *cfg2str(const dt_conf_t *cfg);
170 enum policy_t { NONE = 0, COMMON, PER_OC };
171 policy_t str2policy(const char *str);
172 const char *policy2str(attr_t::scale_t::policy_t policy);
174 struct rnn_prb_t : public rnn_desc_t {
175 rnn_prb_t(const rnn_desc_t desc, const dt_conf_t *cfg,
176 mkldnn_prop_kind_t prop, alg_t alg,
177 mkldnn_rnn_direction_t direction, activation_t activation,
178 const attr_t &attr, policy_t scale_policy, int mb = 0)
183 , direction(direction)
184 , activation(activation)
186 , scale_policy(scale_policy) {
187 if (mb) this->mb = mb;
188 wei_oc_scales = NULL;
189 if (scale_policy == PER_OC)
191 = (float *)zmalloc(sizeof(float) * dic * n_gates(), 64);
192 set_qparams(-1., 1.);
196 zfree(wei_oc_scales);
199 int n_directions() const {
200 return (direction == mkldnn_bidirectional_concat
201 || direction == mkldnn_bidirectional_sum) ?
205 int n_weights() const { return 1; }
206 int n_states() const { return alg == VANILLA_LSTM ? 2 : 1; }
207 int n_gates() const {
208 return alg == VANILLA_LSTM ?
210 (alg == VANILLA_GRU || alg == LBR_GRU ? 3 : 1);
213 return alg == LBR_GRU ? n_gates() + 1 : n_gates();
216 const dt_conf_t *cfg;
217 mkldnn_prop_kind_t prop;
219 mkldnn_rnn_direction_t direction;
220 activation_t activation;
222 policy_t scale_policy;
224 float data_scale, data_shift;
226 float *wei_oc_scales;
229 void set_qparams(float fp_min, float fp_max);
230 rnn_prb_t(const rnn_prb_t &) = delete;
231 rnn_prb_t &operator=(const rnn_prb_t &) = delete;
234 const size_t max_prb_len = 392;
235 void prb2str(const rnn_prb_t *p, const res_t *res, char *buffer);
237 void compute_ref_fwd(const rnn_prb_t *p, dnn_mem_t &input_m,
238 dnn_mem_t &states_m, dnn_mem_t &weights_input_m,
239 dnn_mem_t &weights_states_m, dnn_mem_t &bias_m,
240 dnn_mem_t &dst_last_layer_m, dnn_mem_t &dst_last_iteration_m,
241 mkldnn_rnn_direction_t direction);
243 void compute_ref_bwd(const rnn_prb_t *p, dnn_mem_t &input_m,
244 dnn_mem_t &states_m, dnn_mem_t &diff_last_layer_m,
245 dnn_mem_t &diff_last_iteration_m, dnn_mem_t &weights_input_m,
246 dnn_mem_t &weights_states_m, dnn_mem_t &bias_m,
247 dnn_mem_t &dst_last_layer_m, dnn_mem_t &dst_last_iteration_m,
248 dnn_mem_t &dst_diff_input_m, dnn_mem_t &dst_diff_states_m,
249 dnn_mem_t &dst_diff_weights_input_m,
250 dnn_mem_t &dst_diff_weights_states_m, dnn_mem_t &dst_diff_bias_m,
251 mkldnn_rnn_direction_t direction);
254 inline size_t ntc_off_f(const rnn_prb_t *p, int n, int t, int c) {
255 return ((size_t)n * p->n_iter + t) * p->slc + c;
258 inline void inv_ntc_off_f(
259 const rnn_prb_t *p, size_t off, int &n, int &t, int &c) {
270 inline size_t ldsnc_off_f(
271 const rnn_prb_t *p, int l, int d, int s, int n, int c) {
272 return ((((size_t)l * p->n_directions() + d) * p->n_states() + s) * p->mb
278 inline void inv_ldsnc_off_f(const rnn_prb_t *p, size_t off, int &l, int &d,
279 int &s, int &n, int &c) {
284 s = off % p->n_states();
285 off /= p->n_states();
286 d = off % p->n_directions();
287 off /= p->n_directions();
288 l = off % p->n_layer;
294 inline size_t ldigo_off_f(
295 const rnn_prb_t *p, int l, int d, int w, int ic, int oc) {
296 return ((((size_t)l * p->n_directions() + d) * p->n_weights() + w)
303 inline void inv_ldigo_off_f(const rnn_prb_t *p, size_t off, int &l, int &d,
304 int &w, int &ic, int &oc) {
307 ic = off % (4 * p->slc);
309 w = off % p->n_weights();
310 off /= p->n_weights();
311 d = off % p->n_directions();
312 off /= p->n_directions();
313 l = off % p->n_layer;
319 inline size_t ldwOcIc_off_f(
320 const rnn_prb_t *p, int l, int d, int w, int oc, int ic) {
321 return ((((size_t)l * p->n_directions() + d) * p->n_weights() + w)
328 inline void inv_ldwOcIc_off_f(const rnn_prb_t *p, size_t off, int &l, int &d,
329 int &w, int &oc, int &ic) {
332 oc = off % (4 * p->sic);
334 w = off % p->n_weights();
335 off /= p->n_weights();
336 d = off % p->n_directions();
337 off /= p->n_directions();
338 l = off % p->n_layer;
344 inline size_t ldgo_off_f(const rnn_prb_t *p, int l, int d, int b, int c) {
345 return (((size_t)l * p->n_directions() + d) * p->n_bias() + b) * p->sic
349 inline void inv_ldgo_off_f(
350 const rnn_prb_t *p, size_t off, int &l, int &d, int &b, int &c) {
353 b = off % p->n_bias();
355 d = off % p->n_directions();
356 off /= p->n_directions();
357 l = off % p->n_layer;
362 // dst_last_layer: mkldnn_tnc
363 inline size_t tnc_off_f(const rnn_prb_t *p, int s, int t, int n, int c) {
364 return (((size_t)s * p->n_iter + t) * p->mb + n) * p->sic + c;
367 inline void inv_tnc_off_f(
368 const rnn_prb_t *p, size_t off, int &s, int &t, int &n, int &c) {
375 s = off % p->n_states();
376 off /= p->n_states();
380 void perf_report(const rnn_prb_t *p, const res_t *r, const char *pstr);
382 int doit(const rnn_prb_t *p, res_t *res);
383 void check(rnn_desc_t *p);
384 int bench(int argc, char **argv, bool main_bench = true);