Imported Upstream version 1.4.0
[platform/core/ml/nnfw.git] / runtime / contrib / pure_arm_compute / src / execution.cc
1 /*
2  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <NeuralNetworks.h>
18
19 #include "compilation.h"
20 #include "execution.h"
21 #include "profiling/profiling.h"
22 #include "profiling/profiler.h"
23 #include "event.h"
24
25 #include "internal/VectorSource.h"
26 #include "internal/MatrixSource.h"
27 #include "internal/Tensor3DSource.h"
28 #include "internal/FeatureSource.h"
29 #include "internal/TensorSource.h"
30
31 #include "internal/Sinks.h"
32 #include "internal/VectorSink.h"
33 #include "internal/MatrixSink.h"
34 #include "internal/Tensor3DSink.h"
35 #include "internal/FeatureSink.h"
36
37 #include "misc/feature/IndexIterator.h"
38
39 #include <arm_compute/runtime/CL/CLScheduler.h>
40
41 #include <cassert>
42
43 static void asVectorSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
44                            int32_t len, const void *buffer, size_t length)
45 {
46   switch (type)
47   {
48     case ANEURALNETWORKS_FLOAT32:
49     case ANEURALNETWORKS_TENSOR_FLOAT32:
50       execution->source<VectorSource<float>>(index, len, reinterpret_cast<const float *>(buffer),
51                                              length);
52       break;
53     case ANEURALNETWORKS_INT32:
54     case ANEURALNETWORKS_TENSOR_INT32:
55       execution->source<VectorSource<int32_t>>(index, len,
56                                                reinterpret_cast<const int32_t *>(buffer), length);
57       break;
58     case ANEURALNETWORKS_UINT32:
59       execution->source<VectorSource<uint32_t>>(index, len,
60                                                 reinterpret_cast<const uint32_t *>(buffer), length);
61       break;
62     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
63       execution->source<VectorSource<uint8_t>>(index, len,
64                                                reinterpret_cast<const uint8_t *>(buffer), length);
65       break;
66     default:
67       throw std::runtime_error("Not supported, yet");
68       break;
69   }
70 }
71
72 static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
73                            const nnfw::misc::matrix::Shape &shape, const void *buffer,
74                            size_t length)
75 {
76   switch (type)
77   {
78     case ANEURALNETWORKS_FLOAT32:
79     case ANEURALNETWORKS_TENSOR_FLOAT32:
80       execution->source<MatrixSource<float>>(index, shape, reinterpret_cast<const float *>(buffer),
81                                              length);
82       break;
83     case ANEURALNETWORKS_INT32:
84     case ANEURALNETWORKS_TENSOR_INT32:
85       execution->source<MatrixSource<int32_t>>(index, shape,
86                                                reinterpret_cast<const int32_t *>(buffer), length);
87       break;
88     case ANEURALNETWORKS_UINT32:
89       execution->source<MatrixSource<uint32_t>>(index, shape,
90                                                 reinterpret_cast<const uint32_t *>(buffer), length);
91       break;
92     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
93       execution->source<MatrixSource<uint8_t>>(index, shape,
94                                                reinterpret_cast<const uint8_t *>(buffer), length);
95       break;
96     default:
97       throw std::runtime_error("Not supported, yet");
98       break;
99   }
100 }
101
102 static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
103                              const nnfw::misc::tensor::Shape &shape, const void *buffer,
104                              size_t length)
105 {
106   switch (type)
107   {
108     case ANEURALNETWORKS_FLOAT32:
109     case ANEURALNETWORKS_TENSOR_FLOAT32:
110       execution->source<Tensor3DSource<float>>(index, shape,
111                                                reinterpret_cast<const float *>(buffer), length);
112       break;
113     case ANEURALNETWORKS_INT32:
114     case ANEURALNETWORKS_TENSOR_INT32:
115       execution->source<Tensor3DSource<int32_t>>(index, shape,
116                                                  reinterpret_cast<const int32_t *>(buffer), length);
117       break;
118     case ANEURALNETWORKS_UINT32:
119       execution->source<Tensor3DSource<uint32_t>>(
120           index, shape, reinterpret_cast<const uint32_t *>(buffer), length);
121       break;
122     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
123       execution->source<Tensor3DSource<uint8_t>>(index, shape,
124                                                  reinterpret_cast<const uint8_t *>(buffer), length);
125       break;
126     default:
127       throw std::runtime_error("Not supported, yet");
128       break;
129   }
130 }
131
132 static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
133                            const nnfw::misc::tensor::Shape &shape, const void *buffer,
134                            size_t length)
135 {
136   switch (type)
137   {
138     case ANEURALNETWORKS_FLOAT32:
139     case ANEURALNETWORKS_TENSOR_FLOAT32:
140       execution->source<TensorSource<float>>(index, shape, reinterpret_cast<const float *>(buffer),
141                                              length);
142       break;
143     case ANEURALNETWORKS_INT32:
144     case ANEURALNETWORKS_TENSOR_INT32:
145       execution->source<TensorSource<int32_t>>(index, shape,
146                                                reinterpret_cast<const int32_t *>(buffer), length);
147       break;
148     case ANEURALNETWORKS_UINT32:
149       execution->source<TensorSource<uint32_t>>(index, shape,
150                                                 reinterpret_cast<const uint32_t *>(buffer), length);
151       break;
152     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
153       execution->source<TensorSource<uint8_t>>(index, shape,
154                                                reinterpret_cast<const uint8_t *>(buffer), length);
155       break;
156     default:
157       throw std::runtime_error("Not supported, yet");
158       break;
159   }
160 }
161
162 static void asFeatureSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
163                             const nnfw::misc::feature::Shape &shape, const void *buffer,
164                             size_t length)
165 {
166   switch (type)
167   {
168     case ANEURALNETWORKS_FLOAT32:
169     case ANEURALNETWORKS_TENSOR_FLOAT32:
170       execution->source<FeatureSource<float>>(index, shape, reinterpret_cast<const float *>(buffer),
171                                               length);
172       break;
173     case ANEURALNETWORKS_INT32:
174     case ANEURALNETWORKS_TENSOR_INT32:
175       execution->source<FeatureSource<int32_t>>(index, shape,
176                                                 reinterpret_cast<const int32_t *>(buffer), length);
177       break;
178     case ANEURALNETWORKS_UINT32:
179       execution->source<FeatureSource<uint32_t>>(
180           index, shape, reinterpret_cast<const uint32_t *>(buffer), length);
181       break;
182     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
183       execution->source<FeatureSource<uint8_t>>(index, shape,
184                                                 reinterpret_cast<const uint8_t *>(buffer), length);
185       break;
186     default:
187       throw std::runtime_error("Not supported, yet");
188       break;
189   }
190 }
191
192 static void asVectorSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
193                          int32_t len, void *buffer, size_t length)
194 {
195   switch (type)
196   {
197     case ANEURALNETWORKS_FLOAT32:
198     case ANEURALNETWORKS_TENSOR_FLOAT32:
199       execution->sink<VectorSink<float>>(index, len, reinterpret_cast<float *>(buffer), length);
200       break;
201     case ANEURALNETWORKS_INT32:
202     case ANEURALNETWORKS_TENSOR_INT32:
203       execution->sink<VectorSink<int32_t>>(index, len, reinterpret_cast<int32_t *>(buffer), length);
204       break;
205     case ANEURALNETWORKS_UINT32:
206       execution->sink<VectorSink<uint32_t>>(index, len, reinterpret_cast<uint32_t *>(buffer),
207                                             length);
208       break;
209     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
210       execution->sink<VectorSink<uint8_t>>(index, len, reinterpret_cast<uint8_t *>(buffer), length);
211       break;
212     default:
213       throw std::runtime_error("Not supported, yet");
214       break;
215   }
216 }
217
218 static void asMatrixSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
219                          int32_t H, int32_t W, void *buffer, size_t length)
220 {
221   switch (type)
222   {
223     case ANEURALNETWORKS_FLOAT32:
224     case ANEURALNETWORKS_TENSOR_FLOAT32:
225       execution->sink<MatrixSink<float>>(index, H, W, reinterpret_cast<float *>(buffer), length);
226       break;
227     case ANEURALNETWORKS_INT32:
228     case ANEURALNETWORKS_TENSOR_INT32:
229       execution->sink<MatrixSink<int32_t>>(index, H, W, reinterpret_cast<int32_t *>(buffer),
230                                            length);
231       break;
232     case ANEURALNETWORKS_UINT32:
233       execution->sink<MatrixSink<uint32_t>>(index, H, W, reinterpret_cast<uint32_t *>(buffer),
234                                             length);
235       break;
236     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
237       execution->sink<MatrixSink<uint8_t>>(index, H, W, reinterpret_cast<uint8_t *>(buffer),
238                                            length);
239       break;
240     default:
241       throw std::runtime_error("Not supported, yet");
242       break;
243   }
244 }
245
246 static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
247                           const nnfw::misc::feature::Shape &shape, void *buffer, size_t length)
248 {
249   switch (type)
250   {
251     case ANEURALNETWORKS_FLOAT32:
252     case ANEURALNETWORKS_TENSOR_FLOAT32:
253       execution->sink<FeatureSink<float>>(index, shape, reinterpret_cast<float *>(buffer), length);
254       break;
255     case ANEURALNETWORKS_INT32:
256     case ANEURALNETWORKS_TENSOR_INT32:
257       execution->sink<FeatureSink<int32_t>>(index, shape, reinterpret_cast<int32_t *>(buffer),
258                                             length);
259       break;
260     case ANEURALNETWORKS_UINT32:
261       execution->sink<FeatureSink<uint32_t>>(index, shape, reinterpret_cast<uint32_t *>(buffer),
262                                              length);
263       break;
264     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
265       execution->sink<FeatureSink<uint8_t>>(index, shape, reinterpret_cast<uint8_t *>(buffer),
266                                             length);
267       break;
268     default:
269       throw std::runtime_error("Not supported, yet");
270       break;
271   }
272 }
273
274 static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
275                            const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
276 {
277   assert(shape.rank() == 3);
278
279   switch (type)
280   {
281     case ANEURALNETWORKS_FLOAT32:
282     case ANEURALNETWORKS_TENSOR_FLOAT32:
283       execution->sink<Tensor3DSink<float>>(index, shape, reinterpret_cast<float *>(buffer), length);
284       break;
285     case ANEURALNETWORKS_INT32:
286     case ANEURALNETWORKS_TENSOR_INT32:
287       execution->sink<Tensor3DSink<int32_t>>(index, shape, reinterpret_cast<int32_t *>(buffer),
288                                              length);
289       break;
290     case ANEURALNETWORKS_UINT32:
291       execution->sink<Tensor3DSink<uint32_t>>(index, shape, reinterpret_cast<uint32_t *>(buffer),
292                                               length);
293       break;
294     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
295       execution->sink<Tensor3DSink<uint8_t>>(index, shape, reinterpret_cast<uint8_t *>(buffer),
296                                              length);
297       break;
298     default:
299       throw std::runtime_error("Not supported, yet");
300       break;
301   }
302 }
303
304 static void asTensorSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
305                          const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
306 {
307   switch (type)
308   {
309     case ANEURALNETWORKS_FLOAT32:
310     case ANEURALNETWORKS_TENSOR_FLOAT32:
311       execution->sink<TensorSink<float>>(index, shape, reinterpret_cast<float *>(buffer), length);
312       break;
313     case ANEURALNETWORKS_INT32:
314     case ANEURALNETWORKS_TENSOR_INT32:
315       execution->sink<TensorSink<int32_t>>(index, shape, reinterpret_cast<int32_t *>(buffer),
316                                            length);
317       break;
318     case ANEURALNETWORKS_UINT32:
319       execution->sink<TensorSink<uint32_t>>(index, shape, reinterpret_cast<uint32_t *>(buffer),
320                                             length);
321       break;
322     case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
323       execution->sink<TensorSink<uint8_t>>(index, shape, reinterpret_cast<uint8_t *>(buffer),
324                                            length);
325       break;
326     default:
327       throw std::runtime_error("Not supported, yet");
328       break;
329   }
330 }
331
332 //
333 // NNAPI Implementation
334 //
335 int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
336                                     ANeuralNetworksExecution **execution)
337 {
338   if ((compilation == nullptr) || (execution == nullptr))
339   {
340     return ANEURALNETWORKS_UNEXPECTED_NULL;
341   }
342
343   std::shared_ptr<const ::internal::arm_compute::Plan> plan;
344   compilation->publish(plan);
345   ANeuralNetworksExecution *execution_ptr = new ANeuralNetworksExecution{plan};
346   if (execution_ptr == nullptr)
347   {
348     return ANEURALNETWORKS_OUT_OF_MEMORY;
349   }
350   *execution = execution_ptr;
351
352   return ANEURALNETWORKS_NO_ERROR;
353 }
354
355 int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
356                                       const ANeuralNetworksOperandType *type, const void *buffer,
357                                       size_t length)
358 {
359   // Don't check type
360   // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
361   //  If the input or output is optional and omitted then it need not have a fully specified tensor
362   //  operand type
363   if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
364   {
365     return ANEURALNETWORKS_UNEXPECTED_NULL;
366   }
367
368   const auto &operands = execution->plan().model().operands();
369
370   // TODO Check type conflicts
371
372   // NOTE The current implemenation assumes that every input is a feature map.
373   // TODO Remove this assumption
374   const auto operand_index = execution->plan().model().inputs.at(index);
375   int32_t input_type = operands.at(operand_index).type();
376   // NOTE TFLite passes type parameter unconditionally as nullptr.
377   // Is it necessary to reget type value already set in model step?
378   if (type != nullptr)
379   {
380     input_type = type->type;
381   }
382
383   auto shape = operands.at(operand_index).shape();
384   auto rank = shape.rank();
385
386   if (rank == 1)
387   {
388     const auto len = shape.dim(0);
389
390     asVectorSource(execution, input_type, index, len, buffer, length);
391   }
392   else if (rank == 2)
393   {
394     const auto &operand_shape = shape.asMatrix();
395
396     asMatrixSource(execution, input_type, index, operand_shape, buffer, length);
397   }
398   else if (rank == 3)
399   {
400     const auto &operand_shape = shape.asTensor();
401
402     asTensor3DSource(execution, input_type, index, operand_shape, buffer, length);
403   }
404   else if (rank == 4)
405   {
406     const auto &operand_shape = shape.asFeature();
407
408     asFeatureSource(execution, input_type, index, operand_shape, buffer, length);
409   }
410   else
411   {
412     // NOTE TensorSource is much slower than specialized Source(s)
413     const auto &operand_shape = shape.asTensor();
414
415     asTensorSource(execution, input_type, index, operand_shape, buffer, length);
416   }
417
418   return ANEURALNETWORKS_NO_ERROR;
419 }
420
421 // squeeze(shape) eliminates all the dimensions whose dimensionality is 1
422 // For example, squeeze([3, 1, 3]) returns [3, 3]
423 static nnfw::misc::tensor::Shape squeeze(const nnfw::misc::tensor::Shape &shape)
424 {
425   nnfw::misc::tensor::Shape res(0);
426
427   for (uint32_t axis = 0; axis < shape.rank(); ++axis)
428   {
429     if (shape.dim(axis) != 1)
430     {
431       res.append(shape.dim(axis));
432     }
433   }
434
435   return res;
436 }
437
438 int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
439                                        const ANeuralNetworksOperandType *type, void *buffer,
440                                        size_t length)
441 {
442   // Don't check type
443   // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
444   //  If the input or output is optional and omitted then it need not have a fully specified tensor
445   //  operand type
446   if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
447   {
448     return ANEURALNETWORKS_UNEXPECTED_NULL;
449   }
450
451   const auto &operands = execution->plan().model().operands();
452
453   // TODO Check type conflicts
454
455   const auto operand_index = execution->plan().model().outputs.at(index);
456   int32_t output_type = operands.at(operand_index).type();
457   const auto &output_shape = operands.at(operand_index).shape();
458
459   if (output_shape.rank() == 1)
460   {
461     const auto len = output_shape.dim(0);
462
463     asVectorSink(execution, output_type, index, len, buffer, length);
464   }
465   else if (output_shape.rank() == 2)
466   {
467     const auto H = output_shape.dim(0);
468     const auto W = output_shape.dim(1);
469
470     asMatrixSink(execution, output_type, index, H, W, buffer, length);
471   }
472   else if (output_shape.rank() == 3)
473   {
474     asTensor3DSink(execution, output_type, index, output_shape, buffer, length);
475   }
476   else if ((output_shape.rank() == 4))
477   {
478     const auto &operand_shape = operands.at(operand_index).shape().asFeature();
479
480     asFeatureSink(execution, output_type, index, operand_shape, buffer, length);
481   }
482   else
483   {
484     // NOTE TensorSink is much slower than specialized Sink(s)
485     const auto &shape = operands.at(operand_index).shape();
486     asTensorSink(execution, output_type, index, shape, buffer, length);
487   }
488
489   return ANEURALNETWORKS_NO_ERROR;
490 }
491
492 int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
493                                           ANeuralNetworksEvent **event)
494 {
495   if ((execution == nullptr) || (event == nullptr))
496   {
497     return ANEURALNETWORKS_UNEXPECTED_NULL;
498   }
499
500   // TODO: Handle event
501   ANeuralNetworksEvent *event_ptr = new ANeuralNetworksEvent{};
502   if (event_ptr == nullptr)
503   {
504     return ANEURALNETWORKS_OUT_OF_MEMORY;
505   }
506   *event = event_ptr;
507
508   return ANeuralNetworksExecution_compute(execution);
509 }
510
511 int ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution)
512 {
513   if (execution == nullptr)
514   {
515     return ANEURALNETWORKS_UNEXPECTED_NULL;
516   }
517
518   const bool sync = profiling::Context::get().sync();
519   const auto &plan = execution->plan();
520   const auto &model = plan.model();
521
522   // Set input(s)
523   for (uint32_t n = 0; n < model.inputs.size(); ++n)
524   {
525     auto setter = [&](::arm_compute::ITensor &tensor) { execution->source(n).push(tensor); };
526
527     // Some operand may not be defined at plan. Because some operands
528     // may be useless at ACL (ex. shape tensor for Reshape operator)
529     // So added a sanity check.
530     if (plan.operands().exist(model.inputs.at(n)))
531     {
532       plan.operands().at(model.inputs.at(n)).access(setter);
533     }
534   }
535
536   const auto &operations = execution->plan().operations();
537
538   for (uint32_t n = 0; n < operations.size(); ++n)
539   {
540     auto prof = profiling::Context::get().getProfiler();
541     SCOPED_OPERATOR_PROFILE(prof, operations.at(n).op_idx());
542     operations.at(n).run();
543
544     if (sync)
545     {
546       arm_compute::CLScheduler::get().sync();
547     }
548   }
549
550   // Get output(s)
551   for (uint32_t n = 0; n < model.outputs.size(); ++n)
552   {
553     auto getter = [&](::arm_compute::ITensor &tensor) { execution->sink(n).pull(tensor); };
554
555     plan.operands().at(model.outputs.at(n)).access(getter);
556   }
557
558   return ANEURALNETWORKS_NO_ERROR;
559 }
560
561 void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution) {}
562
563 // TODO: implement this. added to fix link error on test build.
564 int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
565                                                 const ANeuralNetworksOperandType *type,
566                                                 const ANeuralNetworksMemory *memory, size_t offset,
567                                                 size_t length)
568 {
569   if ((execution == nullptr) || (memory == nullptr))
570   {
571     return ANEURALNETWORKS_UNEXPECTED_NULL;
572   }
573
574   assert(false);
575   return -1;
576 }
577
578 // TODO: implement this. added to fix link error on test build.
579 int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
580                                                  const ANeuralNetworksOperandType *type,
581                                                  const ANeuralNetworksMemory *memory, size_t offset,
582                                                  size_t length)
583 {
584   if ((execution == nullptr) || (memory == nullptr))
585   {
586     return ANEURALNETWORKS_UNEXPECTED_NULL;
587   }
588
589   assert(false);
590   return -1;
591 }
592
593 int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution,
594                                                   int32_t index, uint32_t *rank)
595 {
596   if ((execution == nullptr) || (rank == nullptr))
597   {
598     return ANEURALNETWORKS_UNEXPECTED_NULL;
599   }
600
601   const auto &operands = execution->plan().model().operands();
602   const auto operand_index = execution->plan().model().outputs.at(index);
603   const auto &output_shape = operands.at(operand_index).shape();
604
605   *rank = output_shape.rank();
606
607   return ANEURALNETWORKS_NO_ERROR;
608 }
609
610 int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution,
611                                                         int32_t index, uint32_t *dimensions)
612 {
613   if ((execution == nullptr) || (dimensions == nullptr))
614   {
615     return ANEURALNETWORKS_UNEXPECTED_NULL;
616   }
617
618   const auto &operands = execution->plan().model().operands();
619   const auto operand_index = execution->plan().model().outputs.at(index);
620   const auto &output_shape = operands.at(operand_index).shape();
621
622   for (uint32_t axis = 0; axis < output_shape.rank(); ++axis)
623   {
624     dimensions[axis] = static_cast<uint32_t>(output_shape.dim(axis));
625   }
626
627   return ANEURALNETWORKS_NO_ERROR;
628 }