arm_compute v17.06
[platform/upstream/armcl.git] / examples / neon_cnn.cpp
1 /*
2  * Copyright (c) 2016, 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/runtime/NEON/NEFunctions.h"
25
26 #include "arm_compute/core/Types.h"
27 #include "utils/Utils.h"
28
29 using namespace arm_compute;
30 using namespace utils;
31
32 void main_cnn(int argc, const char **argv)
33 {
34     ARM_COMPUTE_UNUSED(argc);
35     ARM_COMPUTE_UNUSED(argv);
36
37     // The src tensor should contain the input image
38     Tensor src;
39
40     // The weights and biases tensors should be initialized with the values inferred with the training
41     Tensor weights0;
42     Tensor weights1;
43     Tensor weights2;
44     Tensor biases0;
45     Tensor biases1;
46     Tensor biases2;
47
48     Tensor out_conv0;
49     Tensor out_conv1;
50     Tensor out_act0;
51     Tensor out_act1;
52     Tensor out_act2;
53     Tensor out_pool0;
54     Tensor out_pool1;
55     Tensor out_fc0;
56     Tensor out_softmax;
57
58     NEConvolutionLayer    conv0;
59     NEConvolutionLayer    conv1;
60     NEPoolingLayer        pool0;
61     NEPoolingLayer        pool1;
62     NEFullyConnectedLayer fc0;
63     NEActivationLayer     act0;
64     NEActivationLayer     act1;
65     NEActivationLayer     act2;
66     NESoftmaxLayer        softmax;
67
68     /* [Initialize tensors] */
69
70     // Initialize src tensor
71     constexpr unsigned int width_src_image  = 32;
72     constexpr unsigned int height_src_image = 32;
73     constexpr unsigned int ifm_src_img      = 1;
74
75     const TensorShape src_shape(width_src_image, height_src_image, ifm_src_img);
76     src.allocator()->init(TensorInfo(src_shape, 1, DataType::F32));
77
78     // Initialize tensors of conv0
79     constexpr unsigned int kernel_x_conv0 = 5;
80     constexpr unsigned int kernel_y_conv0 = 5;
81     constexpr unsigned int ofm_conv0      = 8;
82
83     const TensorShape weights_shape_conv0(kernel_x_conv0, kernel_y_conv0, src_shape.z(), ofm_conv0);
84     const TensorShape biases_shape_conv0(weights_shape_conv0[3]);
85     const TensorShape out_shape_conv0(src_shape.x(), src_shape.y(), weights_shape_conv0[3]);
86
87     weights0.allocator()->init(TensorInfo(weights_shape_conv0, 1, DataType::F32));
88     biases0.allocator()->init(TensorInfo(biases_shape_conv0, 1, DataType::F32));
89     out_conv0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32));
90
91     // Initialize tensor of act0
92     out_act0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32));
93
94     // Initialize tensor of pool0
95     TensorShape out_shape_pool0 = out_shape_conv0;
96     out_shape_pool0.set(0, out_shape_pool0.x() / 2);
97     out_shape_pool0.set(1, out_shape_pool0.y() / 2);
98     out_pool0.allocator()->init(TensorInfo(out_shape_pool0, 1, DataType::F32));
99
100     // Initialize tensors of conv1
101     constexpr unsigned int kernel_x_conv1 = 3;
102     constexpr unsigned int kernel_y_conv1 = 3;
103     constexpr unsigned int ofm_conv1      = 16;
104
105     const TensorShape weights_shape_conv1(kernel_x_conv1, kernel_y_conv1, out_shape_pool0.z(), ofm_conv1);
106
107     const TensorShape biases_shape_conv1(weights_shape_conv1[3]);
108     const TensorShape out_shape_conv1(out_shape_pool0.x(), out_shape_pool0.y(), weights_shape_conv1[3]);
109
110     weights1.allocator()->init(TensorInfo(weights_shape_conv1, 1, DataType::F32));
111     biases1.allocator()->init(TensorInfo(biases_shape_conv1, 1, DataType::F32));
112     out_conv1.allocator()->init(TensorInfo(out_shape_conv1, 1, DataType::F32));
113
114     // Initialize tensor of act1
115     out_act1.allocator()->init(TensorInfo(out_shape_conv1, 1, DataType::F32));
116
117     // Initialize tensor of pool1
118     TensorShape out_shape_pool1 = out_shape_conv1;
119     out_shape_pool1.set(0, out_shape_pool1.x() / 2);
120     out_shape_pool1.set(1, out_shape_pool1.y() / 2);
121     out_pool1.allocator()->init(TensorInfo(out_shape_pool1, 1, DataType::F32));
122
123     // Initialize tensor of fc0
124     constexpr unsigned int num_labels = 128;
125
126     const TensorShape weights_shape_fc0(out_shape_pool1.x() * out_shape_pool1.y() * out_shape_pool1.z(), num_labels);
127     const TensorShape biases_shape_fc0(num_labels);
128     const TensorShape out_shape_fc0(num_labels);
129
130     weights2.allocator()->init(TensorInfo(weights_shape_fc0, 1, DataType::F32));
131     biases2.allocator()->init(TensorInfo(biases_shape_fc0, 1, DataType::F32));
132     out_fc0.allocator()->init(TensorInfo(out_shape_fc0, 1, DataType::F32));
133
134     // Initialize tensor of act2
135     out_act2.allocator()->init(TensorInfo(out_shape_fc0, 1, DataType::F32));
136
137     // Initialize tensor of softmax
138     const TensorShape out_shape_softmax(out_shape_fc0.x());
139     out_softmax.allocator()->init(TensorInfo(out_shape_softmax, 1, DataType::F32));
140
141     /* -----------------------End: [Initialize tensors] */
142
143     /* [Configure functions] */
144
145     // in:32x32x1: 5x5 convolution, 8 output features maps (OFM)
146     conv0.configure(&src, &weights0, &biases0, &out_conv0, PadStrideInfo());
147
148     // in:32x32x8, out:32x32x8, Activation function: relu
149     act0.configure(&out_conv0, &out_act0, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
150
151     // in:32x32x8, out:16x16x8 (2x2 pooling), Pool type function: Max
152     pool0.configure(&out_act0, &out_pool0, PoolingLayerInfo(PoolingType::MAX, 2));
153
154     // in:16x16x8: 3x3 convolution, 16 output features maps (OFM)
155     conv1.configure(&out_pool0, &weights1, &biases1, &out_conv1, PadStrideInfo());
156
157     // in:16x16x16, out:16x16x16, Activation function: relu
158     act1.configure(&out_conv1, &out_act1, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
159
160     // in:16x16x16, out:8x8x16 (2x2 pooling), Pool type function: Average
161     pool1.configure(&out_act1, &out_pool1, PoolingLayerInfo(PoolingType::AVG, 2));
162
163     // in:8x8x16, out:128
164     fc0.configure(&out_pool1, &weights2, &biases2, &out_fc0);
165
166     // in:128, out:128, Activation function: relu
167     act2.configure(&out_fc0, &out_act2, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
168
169     // in:128, out:128
170     softmax.configure(&out_act2, &out_softmax);
171
172     /* -----------------------End: [Configure functions] */
173
174     /* [Allocate tensors] */
175
176     // Now that the padding requirements are known we can allocate the images:
177     src.allocator()->allocate();
178     weights0.allocator()->allocate();
179     weights1.allocator()->allocate();
180     weights2.allocator()->allocate();
181     biases0.allocator()->allocate();
182     biases1.allocator()->allocate();
183     biases2.allocator()->allocate();
184     out_conv0.allocator()->allocate();
185     out_conv1.allocator()->allocate();
186     out_act0.allocator()->allocate();
187     out_act1.allocator()->allocate();
188     out_act2.allocator()->allocate();
189     out_pool0.allocator()->allocate();
190     out_pool1.allocator()->allocate();
191     out_fc0.allocator()->allocate();
192     out_softmax.allocator()->allocate();
193
194     /* -----------------------End: [Allocate tensors] */
195
196     /* [Initialize weights and biases tensors] */
197
198     // Once the tensors have been allocated, the src, weights and biases tensors can be initialized
199     // ...
200
201     /* -----------------------[Initialize weights and biases tensors] */
202
203     /* [Execute the functions] */
204
205     conv0.run();
206     act0.run();
207     pool0.run();
208     conv1.run();
209     act1.run();
210     pool1.run();
211     fc0.run();
212     act2.run();
213     softmax.run();
214
215     /* -----------------------End: [Execute the functions] */
216 }
217
218 /** Main program for cnn test
219  *
220  * The example implements the following CNN architecture:
221  *
222  * Input -> conv0:5x5 -> act0:relu -> pool:2x2 -> conv1:3x3 -> act1:relu -> pool:2x2 -> fc0 -> act2:relu -> softmax
223  *
224  * @param[in] argc Number of arguments
225  * @param[in] argv Arguments
226  */
227 int main(int argc, const char **argv)
228 {
229     return utils::run_example(argc, argv, main_cnn);
230 }