examples/neon_cnn.cpp

   1 /*
   2  * Copyright (c) 2016, 2017 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #include "arm_compute/runtime/NEON/NEFunctions.h"
  25
  26 #include "arm_compute/core/Types.h"
  27 #include "arm_compute/runtime/Allocator.h"
  28 #include "arm_compute/runtime/BlobLifetimeManager.h"
  29 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
  30 #include "arm_compute/runtime/PoolManager.h"
  31 #include "utils/Utils.h"
  32
  33 using namespace arm_compute;
  34 using namespace utils;
  35
  36 void main_cnn(int argc, const char **argv)
  37 {
  38     ARM_COMPUTE_UNUSED(argc);
  39     ARM_COMPUTE_UNUSED(argv);
  40
  41     // Create NEON allocator
  42     Allocator allocator;
  43
  44     // Create memory manager components
  45     // We need 2 memory managers: 1 for handling the tensors within the functions (mm_layers) and 1 for handling the input and output tensors of the functions (mm_transitions))
  46     auto lifetime_mgr0  = std::make_shared<BlobLifetimeManager>();                           // Create lifetime manager
  47     auto lifetime_mgr1  = std::make_shared<BlobLifetimeManager>();                           // Create lifetime manager
  48     auto pool_mgr0      = std::make_shared<PoolManager>();                                   // Create pool manager
  49     auto pool_mgr1      = std::make_shared<PoolManager>();                                   // Create pool manager
  50     auto mm_layers      = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager
  51     auto mm_transitions = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager
  52
  53     // The src tensor should contain the input image
  54     Tensor src;
  55
  56     // The weights and biases tensors should be initialized with the values inferred with the training
  57     Tensor weights0;
  58     Tensor weights1;
  59     Tensor weights2;
  60     Tensor biases0;
  61     Tensor biases1;
  62     Tensor biases2;
  63
  64     Tensor out_conv0;
  65     Tensor out_conv1;
  66     Tensor out_act0;
  67     Tensor out_act1;
  68     Tensor out_act2;
  69     Tensor out_pool0;
  70     Tensor out_pool1;
  71     Tensor out_fc0;
  72     Tensor out_softmax;
  73
  74     // Create layers and set memory manager where allowed to manage internal memory requirements
  75     NEConvolutionLayer    conv0(mm_layers);
  76     NEConvolutionLayer    conv1(mm_layers);
  77     NEPoolingLayer        pool0;
  78     NEPoolingLayer        pool1;
  79     NEFullyConnectedLayer fc0(mm_layers);
  80     NEActivationLayer     act0;
  81     NEActivationLayer     act1;
  82     NEActivationLayer     act2;
  83     NESoftmaxLayer        softmax(mm_layers);
  84
  85     /* [Initialize tensors] */
  86
  87     // Initialize src tensor
  88     constexpr unsigned int width_src_image  = 32;
  89     constexpr unsigned int height_src_image = 32;
  90     constexpr unsigned int ifm_src_img      = 1;
  91
  92     const TensorShape src_shape(width_src_image, height_src_image, ifm_src_img);
  93     src.allocator()->init(TensorInfo(src_shape, 1, DataType::F32));
  94
  95     // Initialize tensors of conv0
  96     constexpr unsigned int kernel_x_conv0 = 5;
  97     constexpr unsigned int kernel_y_conv0 = 5;
  98     constexpr unsigned int ofm_conv0      = 8;
  99
 100     const TensorShape weights_shape_conv0(kernel_x_conv0, kernel_y_conv0, src_shape.z(), ofm_conv0);
 101     const TensorShape biases_shape_conv0(weights_shape_conv0[3]);
 102     const TensorShape out_shape_conv0(src_shape.x(), src_shape.y(), weights_shape_conv0[3]);
 103
 104     weights0.allocator()->init(TensorInfo(weights_shape_conv0, 1, DataType::F32));
 105     biases0.allocator()->init(TensorInfo(biases_shape_conv0, 1, DataType::F32));
 106     out_conv0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32));
 107
 108     // Initialize tensor of act0
 109     out_act0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32));
 110
 111     // Initialize tensor of pool0
 112     TensorShape out_shape_pool0 = out_shape_conv0;
 113     out_shape_pool0.set(0, out_shape_pool0.x() / 2);
 114     out_shape_pool0.set(1, out_shape_pool0.y() / 2);
 115     out_pool0.allocator()->init(TensorInfo(out_shape_pool0, 1, DataType::F32));
 116
 117     // Initialize tensors of conv1
 118     constexpr unsigned int kernel_x_conv1 = 3;
 119     constexpr unsigned int kernel_y_conv1 = 3;
 120     constexpr unsigned int ofm_conv1      = 16;
 121
 122     const TensorShape weights_shape_conv1(kernel_x_conv1, kernel_y_conv1, out_shape_pool0.z(), ofm_conv1);
 123
 124     const TensorShape biases_shape_conv1(weights_shape_conv1[3]);
 125     const TensorShape out_shape_conv1(out_shape_pool0.x(), out_shape_pool0.y(), weights_shape_conv1[3]);
 126
 127     weights1.allocator()->init(TensorInfo(weights_shape_conv1, 1, DataType::F32));
 128     biases1.allocator()->init(TensorInfo(biases_shape_conv1, 1, DataType::F32));
 129     out_conv1.allocator()->init(TensorInfo(out_shape_conv1, 1, DataType::F32));
 130
 131     // Initialize tensor of act1
 132     out_act1.allocator()->init(TensorInfo(out_shape_conv1, 1, DataType::F32));
 133
 134     // Initialize tensor of pool1
 135     TensorShape out_shape_pool1 = out_shape_conv1;
 136     out_shape_pool1.set(0, out_shape_pool1.x() / 2);
 137     out_shape_pool1.set(1, out_shape_pool1.y() / 2);
 138     out_pool1.allocator()->init(TensorInfo(out_shape_pool1, 1, DataType::F32));
 139
 140     // Initialize tensor of fc0
 141     constexpr unsigned int num_labels = 128;
 142
 143     const TensorShape weights_shape_fc0(out_shape_pool1.x() * out_shape_pool1.y() * out_shape_pool1.z(), num_labels);
 144     const TensorShape biases_shape_fc0(num_labels);
 145     const TensorShape out_shape_fc0(num_labels);
 146
 147     weights2.allocator()->init(TensorInfo(weights_shape_fc0, 1, DataType::F32));
 148     biases2.allocator()->init(TensorInfo(biases_shape_fc0, 1, DataType::F32));
 149     out_fc0.allocator()->init(TensorInfo(out_shape_fc0, 1, DataType::F32));
 150
 151     // Initialize tensor of act2
 152     out_act2.allocator()->init(TensorInfo(out_shape_fc0, 1, DataType::F32));
 153
 154     // Initialize tensor of softmax
 155     const TensorShape out_shape_softmax(out_shape_fc0.x());
 156     out_softmax.allocator()->init(TensorInfo(out_shape_softmax, 1, DataType::F32));
 157
 158     /* -----------------------End: [Initialize tensors] */
 159
 160     /* [Configure functions] */
 161
 162     // in:32x32x1: 5x5 convolution, 8 output features maps (OFM)
 163     conv0.configure(&src, &weights0, &biases0, &out_conv0, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 2 /* pad_x */, 2 /* pad_y */));
 164
 165     // in:32x32x8, out:32x32x8, Activation function: relu
 166     act0.configure(&out_conv0, &out_act0, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 167
 168     // in:32x32x8, out:16x16x8 (2x2 pooling), Pool type function: Max
 169     pool0.configure(&out_act0, &out_pool0, PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */)));
 170
 171     // in:16x16x8: 3x3 convolution, 16 output features maps (OFM)
 172     conv1.configure(&out_pool0, &weights1, &biases1, &out_conv1, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 1 /* pad_x */, 1 /* pad_y */));
 173
 174     // in:16x16x16, out:16x16x16, Activation function: relu
 175     act1.configure(&out_conv1, &out_act1, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 176
 177     // in:16x16x16, out:8x8x16 (2x2 pooling), Pool type function: Average
 178     pool1.configure(&out_act1, &out_pool1, PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */)));
 179
 180     // in:8x8x16, out:128
 181     fc0.configure(&out_pool1, &weights2, &biases2, &out_fc0);
 182
 183     // in:128, out:128, Activation function: relu
 184     act2.configure(&out_fc0, &out_act2, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 185
 186     // in:128, out:128
 187     softmax.configure(&out_act2, &out_softmax);
 188
 189     /* -----------------------End: [Configure functions] */
 190
 191     /*[ Add tensors to memory manager ]*/
 192
 193     // We need 2 memory groups for handling the input and output
 194     // We call explicitly allocate after manage() in order to avoid overlapping lifetimes
 195     MemoryGroup memory_group0(mm_transitions);
 196     MemoryGroup memory_group1(mm_transitions);
 197
 198     memory_group0.manage(&out_conv0);
 199     out_conv0.allocator()->allocate();
 200     memory_group1.manage(&out_act0);
 201     out_act0.allocator()->allocate();
 202     memory_group0.manage(&out_pool0);
 203     out_pool0.allocator()->allocate();
 204     memory_group1.manage(&out_conv1);
 205     out_conv1.allocator()->allocate();
 206     memory_group0.manage(&out_act1);
 207     out_act1.allocator()->allocate();
 208     memory_group1.manage(&out_pool1);
 209     out_pool1.allocator()->allocate();
 210     memory_group0.manage(&out_fc0);
 211     out_fc0.allocator()->allocate();
 212     memory_group1.manage(&out_act2);
 213     out_act2.allocator()->allocate();
 214     memory_group0.manage(&out_softmax);
 215     out_softmax.allocator()->allocate();
 216
 217     /* -----------------------End: [ Add tensors to memory manager ] */
 218
 219     /* [Allocate tensors] */
 220
 221     // Now that the padding requirements are known we can allocate all tensors
 222     src.allocator()->allocate();
 223     weights0.allocator()->allocate();
 224     weights1.allocator()->allocate();
 225     weights2.allocator()->allocate();
 226     biases0.allocator()->allocate();
 227     biases1.allocator()->allocate();
 228     biases2.allocator()->allocate();
 229
 230     /* -----------------------End: [Allocate tensors] */
 231
 232     // Finalize layers memory manager
 233
 234     // Set allocator that the memory manager will use
 235     mm_layers->set_allocator(&allocator);
 236
 237     // Number of pools that the manager will create. This specifies how many layers you want to run in parallel
 238     mm_layers->set_num_pools(1);
 239
 240     // Finalize the manager. (Validity checks, memory allocations etc)
 241     mm_layers->finalize();
 242
 243     // Finalize transitions memory manager
 244
 245     // Set allocator that the memory manager will use
 246     mm_transitions->set_allocator(&allocator);
 247
 248     // Number of pools that the manager will create. This specifies how many models we can run in parallel.
 249     // Setting to 2 as we need one for the input and one for the output at any given time
 250     mm_transitions->set_num_pools(2);
 251
 252     // Finalize the manager. (Validity checks, memory allocations etc)
 253     mm_transitions->finalize();
 254
 255     /* [Initialize weights and biases tensors] */
 256
 257     // Once the tensors have been allocated, the src, weights and biases tensors can be initialized
 258     // ...
 259
 260     /* -----------------------[Initialize weights and biases tensors] */
 261
 262     /* [Execute the functions] */
 263
 264     // Acquire memory for the memory groups
 265     memory_group0.acquire();
 266     memory_group1.acquire();
 267
 268     conv0.run();
 269     act0.run();
 270     pool0.run();
 271     conv1.run();
 272     act1.run();
 273     pool1.run();
 274     fc0.run();
 275     act2.run();
 276     softmax.run();
 277
 278     // Release memory
 279     memory_group0.release();
 280     memory_group1.release();
 281
 282     /* -----------------------End: [Execute the functions] */
 283 }
 284
 285 /** Main program for cnn test
 286  *
 287  * The example implements the following CNN architecture:
 288  *
 289  * Input -> conv0:5x5 -> act0:relu -> pool:2x2 -> conv1:3x3 -> act1:relu -> pool:2x2 -> fc0 -> act2:relu -> softmax
 290  *
 291  * @param[in] argc Number of arguments
 292  * @param[in] argv Arguments
 293  */
 294 int main(int argc, const char **argv)
 295 {
 296     return utils::run_example(argc, argv, main_cnn);
 297 }