1 /*******************************************************************************
2 * Copyright 2018 Intel Corporation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
25 #include "dnn_types.hpp"
26 #include "mkldnn_common.hpp"
27 #include "mkldnn_debug.hpp"
28 #include "mkldnn_memory.hpp"
32 enum alg_t { VANILLA_RNN, VANILLA_LSTM, VANILLA_GRU, GRU_LINEAR_BEFORE_RESET };
33 alg_t str2alg(const char *str);
34 const char *alg2str(alg_t alg);
35 mkldnn_alg_kind_t alg2kind(alg_t alg);
37 enum activation_t { RELU, LOGISTIC, TANH };
38 activation_t str2activation(const char *str);
39 const char *activation2str(activation_t alg);
40 mkldnn_alg_kind_t activation2kind(activation_t alg);
45 template <typename Telem>
46 struct array_offset_calculator {
47 template <typename... Targs>
48 array_offset_calculator(Telem *base, Targs... Fargs)
49 : _size(sizeof...(Fargs)) {
50 const int init_list[] = { Fargs... };
51 _dims = new int[_size];
52 for (int i = 0; i < _size; ++i)
53 _dims[i] = init_list[i];
57 ~array_offset_calculator() { delete[] _dims; }
58 template <typename... Targs>
59 inline Telem &operator()(Targs... Fargs) {
60 return *(_base_ptr + _offset(1, Fargs...));
64 template <typename... Targs>
65 inline int _offset(int const dimension, int element) {
69 template <typename... Targs>
70 inline int _offset(int const dimension, int theta, int element) {
71 return element + (_dims[dimension] * theta);
74 template <typename... Targs>
76 int const dimension, int theta, int element, Targs... Fargs) {
77 int t_prime = element + (_dims[dimension] * theta);
78 return _offset(dimension + 1, t_prime, Fargs...);
88 activation_t activation;
89 mkldnn_rnn_direction_t direction;
100 enum rnn_data_kind_t {
110 dst_diff_weights_input,
111 dst_diff_weights_states,
115 data_kind_total // should be last to provide the total number of data kinds
118 inline const char *rnn_data_kind2str(rnn_data_kind_t kind) {
120 case input: return "INPUT";
121 case states: return "STATES";
122 case weights_input: return "WEIGHTS_INPUT";
123 case weights_states: return "WEIGHTS_STATES";
124 case bias: return "BIAS";
125 case dst_last_layer: return "DST_LAST_LAYER";
126 case dst_last_iteration: return "DST_LAST_ITERATION";
128 assert(!"incorrect rnn data kind");
129 return "incorrect rnn data kind";
133 /** configuration structure, that controls initial data filling + error check
135 * dt defines precision
137 * for each lst data kind the values are filled as follows:
138 * if (rand() > f_sparsity) then:
141 * v <-- f_min + rand() * f_step % (f_max - f_min)
143 * on final check the resulting values should be in [min .. max] range, the
144 * relative difference should not exceed eps
147 typedef struct dt_conf_t {
148 mkldnn_data_type_t dt;
149 int min, max; /* representative */
150 int f_min, f_max; /* fill range */
151 int f_base; /* fill base, use 0 */
152 int f_step; /* fill step, use 1 */
153 double f_sparsity; /* amount of non-zeros, default 0.25 */
154 double eps; /* acceptable error */
155 } _dt_conf_t[data_kind_total];
157 extern const _dt_conf_t conf_f32;
159 struct rnn_prb_t : public rnn_desc_t {
160 rnn_prb_t(const rnn_desc_t desc, const dt_conf_t *cfg,
161 mkldnn_prop_kind_t prop)
162 : rnn_desc_t(desc), cfg_(cfg), prop_(prop) {
174 case GRU_LINEAR_BEFORE_RESET:
186 // TODO: recheck below condition
187 if (direction == mkldnn_bidirectional_concat
188 || direction == mkldnn_bidirectional_sum)
194 const dt_conf_t *cfg_;
195 mkldnn_prop_kind_t prop_;
196 int n_direction; // 1 for unidirectional, 2 for bidirectional
198 int n_weights, n_states, n_gates;
201 rnn_prb_t(const rnn_prb_t &) = delete;
202 rnn_prb_t &operator=(const rnn_prb_t &) = delete;
205 const size_t max_prb_len = 392;
206 void prb2str(const rnn_prb_t *p, const res_t *res, char *buffer);
208 void compute_ref_fwd(const rnn_prb_t *p, dnn_mem_t &input_m,
209 dnn_mem_t &states_m, dnn_mem_t &weights_input_m,
210 dnn_mem_t &weights_states_m, dnn_mem_t &bias_m,
211 dnn_mem_t &dst_last_layer_m, dnn_mem_t &dst_last_iteration_m,
212 mkldnn_rnn_direction_t direction);
214 void compute_ref_bwd(const rnn_prb_t *p, dnn_mem_t &input_m,
215 dnn_mem_t &states_m, dnn_mem_t &diff_last_layer_m,
216 dnn_mem_t &diff_last_iteration_m, dnn_mem_t &weights_input_m,
217 dnn_mem_t &weights_states_m, dnn_mem_t &bias_m,
218 dnn_mem_t &dst_last_layer_m, dnn_mem_t &dst_last_iteration_m,
219 dnn_mem_t &dst_diff_input_m, dnn_mem_t &dst_diff_states_m,
220 dnn_mem_t &dst_diff_weights_input_m,
221 dnn_mem_t &dst_diff_weights_states_m, dnn_mem_t &dst_diff_bias_m,
222 mkldnn_rnn_direction_t direction);
225 inline size_t ntc_off_f(const rnn_prb_t *p, int n, int t, int c) {
226 return ((size_t)n * p->n_iter + t) * p->slc + c;
229 inline void inv_ntc_off_f(
230 const rnn_prb_t *p, size_t off, int &n, int &t, int &c) {
241 inline size_t ldsnc_off_f(
242 const rnn_prb_t *p, int l, int d, int s, int n, int c) {
243 return ((((size_t)l * p->n_direction + d) * p->n_states + s) * p->mb + n)
248 inline void inv_ldsnc_off_f(const rnn_prb_t *p, size_t off, int &l, int &d,
249 int &s, int &n, int &c) {
254 s = off % p->n_states;
256 d = off % p->n_direction;
257 off /= p->n_direction;
258 l = off % p->n_layer;
264 inline size_t ldigo_off_f(
265 const rnn_prb_t *p, int l, int d, int w, int ic, int oc) {
266 return ((((size_t)l * p->n_direction + d) * p->n_weights + w) * (4 * p->slc)
272 inline void inv_ldigo_off_f(const rnn_prb_t *p, size_t off, int &l, int &d,
273 int &w, int &ic, int &oc) {
276 ic = off % (4 * p->slc);
278 w = off % p->n_weights;
280 d = off % p->n_direction;
281 off /= p->n_direction;
282 l = off % p->n_layer;
288 inline size_t ldwOcIc_off_f(
289 const rnn_prb_t *p, int l, int d, int w, int oc, int ic) {
290 return ((((size_t)l * p->n_direction + d) * p->n_weights + w) * (4 * p->sic)
296 inline void inv_ldwOcIc_off_f(const rnn_prb_t *p, size_t off, int &l, int &d,
297 int &w, int &oc, int &ic) {
300 oc = off % (4 * p->sic);
302 w = off % p->n_weights;
304 d = off % p->n_direction;
305 off /= p->n_direction;
306 l = off % p->n_layer;
312 inline size_t ldgo_off_f(const rnn_prb_t *p, int l, int d, int b, int c) {
313 return (((size_t)l * p->n_direction + d) * p->n_gates + b) * p->sic + c;
316 inline void inv_ldgo_off_f(
317 const rnn_prb_t *p, size_t off, int &l, int &d, int &b, int &c) {
320 b = off % p->n_gates;
322 d = off % p->n_direction;
323 off /= p->n_direction;
324 l = off % p->n_layer;
329 // dst_last_layer: mkldnn_tnc
330 inline size_t tnc_off_f(const rnn_prb_t *p, int s, int t, int n, int c) {
331 return (((size_t)s * p->n_iter + t) * p->mb + n) * p->sic + c;
334 inline void inv_tnc_off_f(
335 const rnn_prb_t *p, size_t off, int &s, int &t, int &n, int &c) {
342 s = off % p->n_states;
347 void perf_report(const rnn_prb_t *p, const res_t *r, const char *pstr);
349 int doit(const rnn_prb_t *p, res_t *res);
350 void check(const rnn_prb_t *p);
351 int bench(int argc, char **argv);