1 /*******************************************************************************
2 * Copyright 2016-2018 Intel Corporation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
24 using namespace mkldnn;
28 void simple_net(int times = 100) {
30 auto cpu_engine = engine(engine::cpu, 0);
32 /* Create a vector primitive to hold the network. For efficienty purpose,
33 * weights are stored in a separate net to perform reordering only once. */
34 std::vector<primitive> net;
35 std::vector<primitive> net_weights;
40 * {batch, 3, 227, 227} (x) {96, 3, 11, 11} -> {batch, 96, 55, 55}
43 memory::dims conv1_src_tz = { batch, 3, 227, 227 };
44 memory::dims conv1_weights_tz = { 96, 3, 11, 11 };
45 memory::dims conv1_bias_tz = { 96 };
46 memory::dims conv1_dst_tz = { batch, 96, 55, 55 };
47 memory::dims conv1_strides = { 4, 4 };
48 memory::dims conv1_padding = { 0, 0 };
50 /* Allocate input and output buffers for user data */
51 std::vector<float> user_src(batch * 3 * 227 * 227);
52 std::vector<float> user_dst(batch * 1000);
54 /* Allocate and fill buffers for weights and bias */
55 std::vector<float> conv1_weights(std::accumulate(
56 conv1_weights_tz.begin(), conv1_weights_tz.end(), 1,
57 std::multiplies<uint32_t>()));
58 std::vector<float> conv1_bias(std::accumulate(conv1_bias_tz.begin(),
59 conv1_bias_tz.end(), 1, std::multiplies<uint32_t>()));
61 /* create memory for user data */
63 = memory({ { { conv1_src_tz }, memory::data_type::f32,
64 memory::format::nchw },
67 auto user_weights_memory
68 = memory({ { { conv1_weights_tz }, memory::data_type::f32,
69 memory::format::oihw },
71 conv1_weights.data());
72 auto user_bias_memory = memory(
73 { { { conv1_bias_tz }, memory::data_type::f32, memory::format::x },
77 /* create memory descriptors for convolution data w/ no specified format
79 auto conv1_src_md = memory::desc(
80 { conv1_src_tz }, memory::data_type::f32, memory::format::any);
81 auto conv1_bias_md = memory::desc(
82 { conv1_bias_tz }, memory::data_type::f32, memory::format::any);
83 auto conv1_weights_md = memory::desc(
84 { conv1_weights_tz }, memory::data_type::f32, memory::format::any);
85 auto conv1_dst_md = memory::desc(
86 { conv1_dst_tz }, memory::data_type::f32, memory::format::any);
88 /* create a convolution */
89 auto conv1_desc = convolution_forward::desc(
90 prop_kind::forward_inference, convolution_direct, conv1_src_md,
91 conv1_weights_md, conv1_bias_md, conv1_dst_md, conv1_strides,
92 conv1_padding, conv1_padding, padding_kind::zero);
94 = convolution_forward::primitive_desc(conv1_desc, cpu_engine);
96 /* create reorders for data and weights if layout requested by
97 * convolution is different from NCHW/OIHW */
98 auto conv1_src_memory = user_src_memory;
99 if (memory::primitive_desc(conv1_prim_desc.src_primitive_desc())
100 != user_src_memory.get_primitive_desc()) {
101 conv1_src_memory = memory(conv1_prim_desc.src_primitive_desc());
102 net.push_back(reorder(user_src_memory, conv1_src_memory));
105 auto conv1_weights_memory = user_weights_memory;
106 if (memory::primitive_desc(conv1_prim_desc.weights_primitive_desc())
107 != user_weights_memory.get_primitive_desc()) {
109 = memory(conv1_prim_desc.weights_primitive_desc());
110 net_weights.push_back(
111 reorder(user_weights_memory, conv1_weights_memory));
114 auto conv1_dst_memory = memory(conv1_prim_desc.dst_primitive_desc());
116 /* create convolution primitive and add it to net */
117 net.push_back(convolution_forward(conv1_prim_desc, conv1_src_memory,
118 conv1_weights_memory, user_bias_memory,
122 * {batch, 96, 55, 55} -> {batch, 96, 55, 55}
124 const float negative1_slope = 1.0f;
126 /* create relu primitive and add it to net */
127 auto relu1_desc = eltwise_forward::desc(prop_kind::forward_inference,
128 algorithm::eltwise_relu,
129 conv1_dst_memory.get_primitive_desc().desc(), negative1_slope);
131 = eltwise_forward::primitive_desc(relu1_desc, cpu_engine);
133 net.push_back(eltwise_forward(
134 relu1_prim_desc, conv1_dst_memory, conv1_dst_memory));
137 * {batch, 96, 55, 55} -> {batch, 96, 55, 55}
142 const uint32_t local1_size = 5;
143 const float alpha1 = 0.0001f;
144 const float beta1 = 0.75f;
145 const float k1 = 1.0f;
147 /* create lrn primitive and add it to net */
148 auto lrn1_desc = lrn_forward::desc(prop_kind::forward_inference,
150 conv1_dst_memory.get_primitive_desc().desc(), local1_size,
153 = lrn_forward::primitive_desc(lrn1_desc, cpu_engine);
154 auto lrn1_dst_memory = memory(lrn1_prim_desc.dst_primitive_desc());
157 lrn_forward(lrn1_prim_desc, conv1_dst_memory, lrn1_dst_memory));
160 * {batch, 96, 55, 55} -> {batch, 96, 27, 27}
165 memory::dims pool1_dst_tz = { batch, 96, 27, 27 };
166 memory::dims pool1_kernel = { 3, 3 };
167 memory::dims pool1_strides = { 2, 2 };
168 memory::dims pool_padding = { 0, 0 };
170 auto pool1_dst_md = memory::desc(
171 { pool1_dst_tz }, memory::data_type::f32, memory::format::any);
173 /* create a pooling */
174 auto pool1_desc = pooling_forward::desc(prop_kind::forward_inference,
175 pooling_max, lrn1_dst_memory.get_primitive_desc().desc(),
176 pool1_dst_md, pool1_strides, pool1_kernel, pool_padding,
177 pool_padding, padding_kind::zero);
178 auto pool1_pd = pooling_forward::primitive_desc(pool1_desc, cpu_engine);
179 auto pool1_dst_memory = memory(pool1_pd.dst_primitive_desc());
181 /* create pooling primitive an add it to net */
183 pooling_forward(pool1_pd, lrn1_dst_memory, pool1_dst_memory));
186 * {batch, 96, 27, 27} (x) {2, 128, 48, 5, 5} -> {batch, 256, 27, 27}
189 memory::dims conv2_src_tz = { batch, 96, 27, 27 };
190 memory::dims conv2_weights_tz = { 2, 128, 48, 5, 5 };
191 memory::dims conv2_bias_tz = { 256 };
192 memory::dims conv2_dst_tz = { batch, 256, 27, 27 };
193 memory::dims conv2_strides = { 1, 1 };
194 memory::dims conv2_padding = { 2, 2 };
196 std::vector<float> conv2_weights(std::accumulate(
197 conv2_weights_tz.begin(), conv2_weights_tz.end(), 1,
198 std::multiplies<uint32_t>()));
199 std::vector<float> conv2_bias(std::accumulate(conv2_bias_tz.begin(),
200 conv2_bias_tz.end(), 1, std::multiplies<uint32_t>()));
202 /* create memory for user data */
203 auto conv2_user_weights_memory
204 = memory({ { { conv2_weights_tz }, memory::data_type::f32,
205 memory::format::goihw },
207 conv2_weights.data());
208 auto conv2_user_bias_memory
209 = memory({ { { conv2_bias_tz }, memory::data_type::f32,
214 /* create memory descriptors for convolution data w/ no specified format
216 auto conv2_src_md = memory::desc(
217 { conv2_src_tz }, memory::data_type::f32, memory::format::any);
218 auto conv2_bias_md = memory::desc(
219 { conv2_bias_tz }, memory::data_type::f32, memory::format::any);
220 auto conv2_weights_md = memory::desc({ conv2_weights_tz },
221 memory::data_type::f32, memory::format::any);
222 auto conv2_dst_md = memory::desc(
223 { conv2_dst_tz }, memory::data_type::f32, memory::format::any);
225 /* create a convolution */
226 auto conv2_desc = convolution_forward::desc(
227 prop_kind::forward_inference, convolution_direct, conv2_src_md,
228 conv2_weights_md, conv2_bias_md, conv2_dst_md, conv2_strides,
229 conv2_padding, conv2_padding, padding_kind::zero);
231 = convolution_forward::primitive_desc(conv2_desc, cpu_engine);
233 auto conv2_src_memory = pool1_dst_memory;
234 if (memory::primitive_desc(conv2_prim_desc.src_primitive_desc())
235 != conv2_src_memory.get_primitive_desc()) {
236 conv2_src_memory = memory(conv2_prim_desc.src_primitive_desc());
237 net.push_back(reorder(pool1_dst_memory, conv2_src_memory));
240 auto conv2_weights_memory = conv2_user_weights_memory;
241 if (memory::primitive_desc(conv2_prim_desc.weights_primitive_desc())
242 != conv2_user_weights_memory.get_primitive_desc()) {
244 = memory(conv2_prim_desc.weights_primitive_desc());
245 net_weights.push_back(
246 reorder(conv2_user_weights_memory, conv2_weights_memory));
249 auto conv2_dst_memory = memory(conv2_prim_desc.dst_primitive_desc());
251 /* create convolution primitive and add it to net */
252 net.push_back(convolution_forward(conv2_prim_desc, conv2_src_memory,
253 conv2_weights_memory, conv2_user_bias_memory,
257 * {batch, 256, 27, 27} -> {batch, 256, 27, 27}
259 const float negative2_slope = 1.0f;
261 /* create relu primitive and add it to net */
262 auto relu2_desc = eltwise_forward::desc(prop_kind::forward_inference,
263 algorithm::eltwise_relu,
264 conv2_dst_memory.get_primitive_desc().desc(), negative2_slope);
266 = eltwise_forward::primitive_desc(relu2_desc, cpu_engine);
268 net.push_back(eltwise_forward(
269 relu2_prim_desc, conv2_dst_memory, conv2_dst_memory));
272 * {batch, 256, 27, 27} -> {batch, 256, 27, 27}
277 const uint32_t local2_size = 5;
278 const float alpha2 = 0.0001f;
279 const float beta2 = 0.75f;
280 const float k2 = 1.0f;
282 /* create lrn primitive and add it to net */
283 auto lrn2_desc = lrn_forward::desc(prop_kind::forward_inference,
285 conv2_prim_desc.dst_primitive_desc().desc(), local2_size,
288 = lrn_forward::primitive_desc(lrn2_desc, cpu_engine);
289 auto lrn2_dst_memory = memory(lrn2_prim_desc.dst_primitive_desc());
292 lrn_forward(lrn2_prim_desc, conv2_dst_memory, lrn2_dst_memory));
295 * {batch, 256, 27, 27} -> {batch, 256, 13, 13}
300 memory::dims pool2_dst_tz = { batch, 256, 13, 13 };
301 memory::dims pool2_kernel = { 3, 3 };
302 memory::dims pool2_strides = { 2, 2 };
303 memory::dims pool2_padding = { 0, 0 };
305 auto pool2_dst_md = memory::desc(
306 { pool2_dst_tz }, memory::data_type::f32, memory::format::any);
308 /* create a pooling */
309 auto pool2_desc = pooling_forward::desc(prop_kind::forward_inference,
310 pooling_max, lrn2_dst_memory.get_primitive_desc().desc(),
311 pool2_dst_md, pool2_strides, pool2_kernel, pool2_padding,
312 pool2_padding, padding_kind::zero);
313 auto pool2_pd = pooling_forward::primitive_desc(pool2_desc, cpu_engine);
315 auto pool2_dst_memory = memory(pool2_pd.dst_primitive_desc());
317 /* create pooling primitive an add it to net */
319 pooling_forward(pool2_pd, lrn2_dst_memory, pool2_dst_memory));
323 * {batch, 256, 13, 13} (x) {384, 256, 3, 3}; -> {batch, 384, 13, 13};
326 memory::dims conv3_src_tz = { batch, 256, 13, 13 };
327 memory::dims conv3_weights_tz = { 384, 256, 3, 3 };
328 memory::dims conv3_bias_tz = { 384 };
329 memory::dims conv3_dst_tz = { batch, 384, 13, 13 };
330 memory::dims conv3_strides = { 1, 1 };
331 memory::dims conv3_padding = { 1, 1 };
333 std::vector<float> conv3_weights(std::accumulate(
334 conv3_weights_tz.begin(), conv3_weights_tz.end(), 1,
335 std::multiplies<uint32_t>()));
336 std::vector<float> conv3_bias(std::accumulate(conv3_bias_tz.begin(),
337 conv3_bias_tz.end(), 1, std::multiplies<uint32_t>()));
339 /* create memory for user data */
340 auto conv3_user_weights_memory
341 = memory({ { { conv3_weights_tz }, memory::data_type::f32,
342 memory::format::oihw },
344 conv3_weights.data());
345 auto conv3_user_bias_memory
346 = memory({ { { conv3_bias_tz }, memory::data_type::f32,
351 /* create memory descriptors for convolution data w/ no specified format
353 auto conv3_src_md = memory::desc(
354 { conv3_src_tz }, memory::data_type::f32, memory::format::any);
355 auto conv3_bias_md = memory::desc(
356 { conv3_bias_tz }, memory::data_type::f32, memory::format::any);
357 auto conv3_weights_md = memory::desc({ conv3_weights_tz },
358 memory::data_type::f32, memory::format::any);
359 auto conv3_dst_md = memory::desc(
360 { conv3_dst_tz }, memory::data_type::f32, memory::format::any);
362 /* create a convolution */
363 auto conv3_desc = convolution_forward::desc(
364 prop_kind::forward_inference, convolution_direct, conv3_src_md,
365 conv3_weights_md, conv3_bias_md, conv3_dst_md, conv3_strides,
366 conv3_padding, conv3_padding, padding_kind::zero);
368 = convolution_forward::primitive_desc(conv3_desc, cpu_engine);
370 auto conv3_src_memory = pool2_dst_memory;
371 if (memory::primitive_desc(conv3_prim_desc.src_primitive_desc())
372 != conv3_src_memory.get_primitive_desc()) {
373 conv3_src_memory = memory(conv3_prim_desc.src_primitive_desc());
374 net.push_back(reorder(pool2_dst_memory, conv3_src_memory));
377 auto conv3_weights_memory = conv3_user_weights_memory;
378 if (memory::primitive_desc(conv3_prim_desc.weights_primitive_desc())
379 != conv3_user_weights_memory.get_primitive_desc()) {
381 = memory(conv3_prim_desc.weights_primitive_desc());
382 net_weights.push_back(
383 reorder(conv3_user_weights_memory, conv3_weights_memory));
386 auto conv3_dst_memory = memory(conv3_prim_desc.dst_primitive_desc());
388 /* create convolution primitive and add it to net */
389 net.push_back(convolution_forward(conv3_prim_desc, conv3_src_memory,
390 conv3_weights_memory, conv3_user_bias_memory,
394 * {batch, 384, 13, 13} -> {batch, 384, 13, 13}
396 const float negative3_slope = 1.0f;
398 /* create relu primitive and add it to net */
399 auto relu3_desc = eltwise_forward::desc(prop_kind::forward_inference,
400 algorithm::eltwise_relu,
401 conv3_dst_memory.get_primitive_desc().desc(), negative3_slope);
403 = eltwise_forward::primitive_desc(relu3_desc, cpu_engine);
405 net.push_back(eltwise_forward(
406 relu3_prim_desc, conv3_dst_memory, conv3_dst_memory));
409 * {batch, 384, 13, 13} (x) {2, 192, 192, 3, 3}; -> {batch, 384, 13,
413 memory::dims conv4_src_tz = { batch, 384, 13, 13 };
414 memory::dims conv4_weights_tz = { 2, 192, 192, 3, 3 };
415 memory::dims conv4_bias_tz = { 384 };
416 memory::dims conv4_dst_tz = { batch, 384, 13, 13 };
417 memory::dims conv4_strides = { 1, 1 };
418 memory::dims conv4_padding = { 1, 1 };
420 std::vector<float> conv4_weights(std::accumulate(
421 conv4_weights_tz.begin(), conv4_weights_tz.end(), 1,
422 std::multiplies<uint32_t>()));
423 std::vector<float> conv4_bias(std::accumulate(conv4_bias_tz.begin(),
424 conv4_bias_tz.end(), 1, std::multiplies<uint32_t>()));
426 /* create memory for user data */
427 auto conv4_user_weights_memory
428 = memory({ { { conv4_weights_tz }, memory::data_type::f32,
429 memory::format::goihw },
431 conv4_weights.data());
432 auto conv4_user_bias_memory
433 = memory({ { { conv4_bias_tz }, memory::data_type::f32,
438 /* create memory descriptors for convolution data w/ no specified format
440 auto conv4_src_md = memory::desc(
441 { conv4_src_tz }, memory::data_type::f32, memory::format::any);
442 auto conv4_bias_md = memory::desc(
443 { conv4_bias_tz }, memory::data_type::f32, memory::format::any);
444 auto conv4_weights_md = memory::desc({ conv4_weights_tz },
445 memory::data_type::f32, memory::format::any);
446 auto conv4_dst_md = memory::desc(
447 { conv4_dst_tz }, memory::data_type::f32, memory::format::any);
449 /* create a convolution */
450 auto conv4_desc = convolution_forward::desc(
451 prop_kind::forward_inference, convolution_direct, conv4_src_md,
452 conv4_weights_md, conv4_bias_md, conv4_dst_md, conv4_strides,
453 conv4_padding, conv4_padding, padding_kind::zero);
455 = convolution_forward::primitive_desc(conv4_desc, cpu_engine);
457 auto conv4_src_memory = conv3_dst_memory;
458 if (memory::primitive_desc(conv4_prim_desc.src_primitive_desc())
459 != conv4_src_memory.get_primitive_desc()) {
460 conv4_src_memory = memory(conv4_prim_desc.src_primitive_desc());
461 net.push_back(reorder(conv3_dst_memory, conv4_src_memory));
464 auto conv4_weights_memory = conv4_user_weights_memory;
465 if (memory::primitive_desc(conv4_prim_desc.weights_primitive_desc())
466 != conv4_user_weights_memory.get_primitive_desc()) {
468 = memory(conv4_prim_desc.weights_primitive_desc());
469 net_weights.push_back(
470 reorder(conv4_user_weights_memory, conv4_weights_memory));
473 auto conv4_dst_memory = memory(conv4_prim_desc.dst_primitive_desc());
475 /* create convolution primitive and add it to net */
476 net.push_back(convolution_forward(conv4_prim_desc, conv4_src_memory,
477 conv4_weights_memory, conv4_user_bias_memory,
481 * {batch, 384, 13, 13} -> {batch, 384, 13, 13}
483 const float negative4_slope = 1.0f;
485 /* create relu primitive and add it to net */
486 auto relu4_desc = eltwise_forward::desc(prop_kind::forward_inference,
487 algorithm::eltwise_relu,
488 conv4_dst_memory.get_primitive_desc().desc(), negative4_slope);
490 = eltwise_forward::primitive_desc(relu4_desc, cpu_engine);
492 net.push_back(eltwise_forward(
493 relu4_prim_desc, conv4_dst_memory, conv4_dst_memory));
496 * {batch, 384, 13, 13} (x) {2, 128, 192, 3, 3}; -> {batch, 256, 13,
500 memory::dims conv5_weights_tz = { 2, 128, 192, 3, 3 };
501 memory::dims conv5_bias_tz = { 256 };
502 memory::dims conv5_dst_tz = { batch, 256, 13, 13 };
503 memory::dims conv5_strides = { 1, 1 };
504 memory::dims conv5_padding = { 1, 1 };
506 std::vector<float> conv5_weights(std::accumulate(
507 conv5_weights_tz.begin(), conv5_weights_tz.end(), 1,
508 std::multiplies<uint32_t>()));
509 std::vector<float> conv5_bias(std::accumulate(conv5_bias_tz.begin(),
510 conv5_bias_tz.end(), 1, std::multiplies<uint32_t>()));
512 /* create memory for user data */
513 auto conv5_user_weights_memory
514 = memory({ { { conv5_weights_tz }, memory::data_type::f32,
515 memory::format::goihw },
517 conv5_weights.data());
518 auto conv5_user_bias_memory
519 = memory({ { { conv5_bias_tz }, memory::data_type::f32,
524 /* create memory descriptors for convolution data w/ no specified format
526 auto conv5_bias_md = memory::desc(
527 { conv5_bias_tz }, memory::data_type::f32, memory::format::any);
528 auto conv5_weights_md = memory::desc({ conv5_weights_tz },
529 memory::data_type::f32, memory::format::any);
530 auto conv5_dst_md = memory::desc(
531 { conv5_dst_tz }, memory::data_type::f32, memory::format::any);
533 /* create a convolution */
534 auto conv5_desc = convolution_forward::desc(
535 prop_kind::forward_inference, convolution_direct,
536 conv4_dst_memory.get_primitive_desc().desc(), conv5_weights_md,
537 conv5_bias_md, conv5_dst_md, conv5_strides, conv5_padding,
538 conv5_padding, padding_kind::zero);
540 = convolution_forward::primitive_desc(conv5_desc, cpu_engine);
542 auto conv5_src_memory = conv4_dst_memory;
543 if (memory::primitive_desc(conv5_prim_desc.src_primitive_desc())
544 != conv5_src_memory.get_primitive_desc()) {
545 conv5_src_memory = memory(conv5_prim_desc.src_primitive_desc());
546 net.push_back(reorder(conv4_dst_memory, conv5_src_memory));
549 auto conv5_weights_memory = conv5_user_weights_memory;
550 if (memory::primitive_desc(conv5_prim_desc.weights_primitive_desc())
551 != conv5_user_weights_memory.get_primitive_desc()) {
553 = memory(conv5_prim_desc.weights_primitive_desc());
554 net_weights.push_back(
555 reorder(conv5_user_weights_memory, conv5_weights_memory));
558 auto conv5_dst_memory = memory(conv5_prim_desc.dst_primitive_desc());
560 /* create convolution primitive and add it to net */
561 net.push_back(convolution_forward(conv5_prim_desc, conv5_src_memory,
562 conv5_weights_memory, conv5_user_bias_memory,
566 * {batch, 256, 13, 13} -> {batch, 256, 13, 13}
568 const float negative5_slope = 1.0f;
570 /* create relu primitive and add it to net */
571 auto relu5_desc = eltwise_forward::desc(prop_kind::forward_inference,
572 algorithm::eltwise_relu,
573 conv5_dst_memory.get_primitive_desc().desc(), negative5_slope);
575 = eltwise_forward::primitive_desc(relu5_desc, cpu_engine);
577 net.push_back(eltwise_forward(
578 relu5_prim_desc, conv5_dst_memory, conv5_dst_memory));
581 * {batch, 256, 13, 13} -> {batch, 256, 6, 6}
586 memory::dims pool5_dst_tz = { batch, 256, 6, 6 };
587 memory::dims pool5_kernel = { 3, 3 };
588 memory::dims pool5_strides = { 2, 2 };
589 memory::dims pool5_padding = { 0, 0 };
591 std::vector<float> pool5_dst(std::accumulate(pool5_dst_tz.begin(),
592 pool5_dst_tz.end(), 1, std::multiplies<uint32_t>()));
594 auto pool5_dst_md = memory::desc(
595 { pool5_dst_tz }, memory::data_type::f32, memory::format::any);
597 /* create a pooling */
598 auto pool5_desc = pooling_forward::desc(prop_kind::forward_inference,
599 pooling_max, conv5_dst_memory.get_primitive_desc().desc(),
600 pool5_dst_md, pool5_strides, pool5_kernel, pool5_padding,
601 pool5_padding, padding_kind::zero);
602 auto pool5_pd = pooling_forward::primitive_desc(pool5_desc, cpu_engine);
604 auto pool5_dst_memory = memory(pool5_pd.dst_primitive_desc());
606 /* create pooling primitive an add it to net */
608 pooling_forward(pool5_pd, conv5_dst_memory, pool5_dst_memory));
611 * fc6 inner product {batch, 256, 6, 6} (x) {4096, 256, 6, 6}-> {batch,
614 memory::dims fc6_src_tz = { batch, 256, 6, 6 };
615 memory::dims fc6_weights_tz = { 4096, 256, 6, 6 };
616 memory::dims fc6_bias_tz = { 4096 };
617 memory::dims fc6_dst_tz = { batch, 4096 };
619 std::vector<float> fc6_weights(std::accumulate(fc6_weights_tz.begin(),
620 fc6_weights_tz.end(), 1, std::multiplies<uint32_t>()));
621 std::vector<float> fc6_bias(std::accumulate(fc6_bias_tz.begin(),
622 fc6_bias_tz.end(), 1, std::multiplies<uint32_t>()));
624 /* create memory for user data */
625 auto fc6_user_weights_memory
626 = memory({ { { fc6_weights_tz }, memory::data_type::f32,
627 memory::format::oihw },
631 auto fc6_user_bias_memory
632 = memory({ { { fc6_bias_tz }, memory::data_type::f32,
637 /* create memory descriptors for convolution data w/ no specified format
639 auto fc6_src_md = memory::desc(
640 { fc6_src_tz }, memory::data_type::f32, memory::format::any);
641 auto fc6_bias_md = memory::desc(
642 { fc6_bias_tz }, memory::data_type::f32, memory::format::any);
643 auto fc6_weights_md = memory::desc({ fc6_weights_tz },
644 memory::data_type::f32, memory::format::any);
645 auto fc6_dst_md = memory::desc(
646 { fc6_dst_tz }, memory::data_type::f32, memory::format::any);
648 /* create a inner_product */
650 = inner_product_forward::desc(prop_kind::forward_inference,
651 fc6_src_md, fc6_weights_md, fc6_bias_md, fc6_dst_md);
653 = inner_product_forward::primitive_desc(fc6_desc, cpu_engine);
655 auto fc6_src_memory = pool5_dst_memory;
656 if (memory::primitive_desc(fc6_prim_desc.src_primitive_desc())
657 != fc6_src_memory.get_primitive_desc()) {
658 fc6_src_memory = memory(fc6_prim_desc.src_primitive_desc());
659 net.push_back(reorder(pool5_dst_memory, fc6_src_memory));
662 auto fc6_weights_memory = fc6_user_weights_memory;
663 if (memory::primitive_desc(fc6_prim_desc.weights_primitive_desc())
664 != fc6_user_weights_memory.get_primitive_desc()) {
665 fc6_weights_memory = memory(fc6_prim_desc.weights_primitive_desc());
666 net_weights.push_back(
667 reorder(fc6_user_weights_memory, fc6_weights_memory));
670 auto fc6_dst_memory = memory(fc6_prim_desc.dst_primitive_desc());
672 /* create convolution primitive and add it to net */
673 net.push_back(inner_product_forward(fc6_prim_desc, fc6_src_memory,
674 fc6_weights_memory, fc6_user_bias_memory, fc6_dst_memory));
677 * fc7 inner product {batch, 4096} (x) {4096, 4096}-> {batch, 4096}
679 memory::dims fc7_weights_tz = { 4096, 4096 };
680 memory::dims fc7_bias_tz = { 4096 };
681 memory::dims fc7_dst_tz = { batch, 4096 };
683 std::vector<float> fc7_weights(std::accumulate(fc7_weights_tz.begin(),
684 fc7_weights_tz.end(), 1, std::multiplies<uint32_t>()));
685 std::vector<float> fc7_bias(std::accumulate(fc7_bias_tz.begin(),
686 fc7_bias_tz.end(), 1, std::multiplies<uint32_t>()));
688 /* create memory for user data */
689 auto fc7_user_weights_memory
690 = memory({ { { fc7_weights_tz }, memory::data_type::f32,
691 memory::format::nc },
695 auto fc7_user_bias_memory
696 = memory({ { { fc7_bias_tz }, memory::data_type::f32,
701 /* create memory descriptors for convolution data w/ no specified format
703 auto fc7_bias_md = memory::desc(
704 { fc7_bias_tz }, memory::data_type::f32, memory::format::any);
705 auto fc7_weights_md = memory::desc({ fc7_weights_tz },
706 memory::data_type::f32, memory::format::any);
707 auto fc7_dst_md = memory::desc(
708 { fc7_dst_tz }, memory::data_type::f32, memory::format::any);
710 /* create a inner_product */
712 = inner_product_forward::desc(prop_kind::forward_inference,
713 fc6_dst_memory.get_primitive_desc().desc(),
714 fc7_weights_md, fc7_bias_md, fc7_dst_md);
716 = inner_product_forward::primitive_desc(fc7_desc, cpu_engine);
718 auto fc7_weights_memory = fc7_user_weights_memory;
719 if (memory::primitive_desc(fc7_prim_desc.weights_primitive_desc())
720 != fc7_user_weights_memory.get_primitive_desc()) {
721 fc7_weights_memory = memory(fc7_prim_desc.weights_primitive_desc());
722 net.push_back(reorder(fc7_user_weights_memory, fc7_weights_memory));
725 auto fc7_dst_memory = memory(fc7_prim_desc.dst_primitive_desc());
727 /* create convolution primitive and add it to net */
728 net.push_back(inner_product_forward(fc7_prim_desc, fc6_dst_memory,
729 fc7_weights_memory, fc7_user_bias_memory, fc7_dst_memory));
732 * fc8 inner product {batch, 4096} (x) {1000, 4096}-> {batch, 1000}
734 memory::dims fc8_weights_tz = { 1000, 4096 };
735 memory::dims fc8_bias_tz = { 1000 };
736 memory::dims fc8_dst_tz = { batch, 1000 };
738 std::vector<float> fc8_weights(std::accumulate(fc8_weights_tz.begin(),
739 fc8_weights_tz.end(), 1, std::multiplies<uint32_t>()));
740 std::vector<float> fc8_bias(std::accumulate(fc8_bias_tz.begin(),
741 fc8_bias_tz.end(), 1, std::multiplies<uint32_t>()));
743 /* create memory for user data */
744 auto fc8_user_weights_memory
745 = memory({ { { fc8_weights_tz }, memory::data_type::f32,
746 memory::format::nc },
750 auto fc8_user_bias_memory
751 = memory({ { { fc8_bias_tz }, memory::data_type::f32,
756 auto user_dst_memory = memory({ { { fc8_dst_tz }, memory::data_type::f32,
757 memory::format::nc },
761 /* create memory descriptors for convolution data w/ no specified format
763 auto fc8_bias_md = memory::desc(
764 { fc8_bias_tz }, memory::data_type::f32, memory::format::any);
765 auto fc8_weights_md = memory::desc({ fc8_weights_tz },
766 memory::data_type::f32, memory::format::any);
767 auto fc8_dst_md = memory::desc(
768 { fc8_dst_tz }, memory::data_type::f32, memory::format::any);
770 /* create a inner_product */
772 = inner_product_forward::desc(prop_kind::forward_inference,
773 fc7_dst_memory.get_primitive_desc().desc(),
774 fc8_weights_md, fc8_bias_md, fc8_dst_md);
776 = inner_product_forward::primitive_desc(fc8_desc, cpu_engine);
778 auto fc8_weights_memory = fc8_user_weights_memory;
779 if (memory::primitive_desc(fc8_prim_desc.weights_primitive_desc())
780 != fc8_user_weights_memory.get_primitive_desc()) {
781 fc8_weights_memory = memory(fc8_prim_desc.weights_primitive_desc());
782 net_weights.push_back(
783 reorder(fc8_user_weights_memory, fc8_weights_memory));
786 auto fc8_dst_memory = memory(fc8_prim_desc.dst_primitive_desc());
788 /* create convolution primitive and add it to net */
789 net.push_back(inner_product_forward(fc8_prim_desc, fc7_dst_memory,
790 fc8_weights_memory, fc8_user_bias_memory, fc8_dst_memory));
792 /* create reorder between internal and user data if it is needed and
793 * add it to net after pooling */
794 if (fc8_dst_memory != user_dst_memory) {
795 net.push_back(reorder(fc8_dst_memory, user_dst_memory));
798 stream(stream::kind::eager).submit(net_weights).wait();
799 for (int j = 0; j < times; ++j) {
800 stream(stream::kind::eager).submit(net).wait();
804 int main(int argc, char **argv) {
806 auto begin = chrono::duration_cast<chrono::milliseconds>(
807 chrono::steady_clock::now().time_since_epoch())
811 auto end = chrono::duration_cast<chrono::milliseconds>(
812 chrono::steady_clock::now().time_since_epoch())
814 cout << "Use time " << (end - begin) / (times + 0.0) << "\n";
816 std::cerr << "status: " << e.status << std::endl;
817 std::cerr << "message: " << e.message << std::endl;