Add skeleton of CpuExecutor (#136)
author최형규/동작제어Lab(SR)/Senior Engineer/삼성전자 <hk0110.choi@samsung.com>
Thu, 22 Mar 2018 09:32:02 +0000 (18:32 +0900)
committerGitHub Enterprise <noreply-CODE@samsung.com>
Thu, 22 Mar 2018 09:32:02 +0000 (18:32 +0900)
* Add skeleton of CpuExecutor

- Copy CpuExecutor.h and CpuExecutor.cpp from Android NN
- And comment out implementation which we don't support now

Signed-off-by: Hyung-Kyu Choi <hk0110.choi@samsung.com>
* Apply new comment style for reference NN runtime

- Apply new comment style for reference NN runtime

Signed-off-by: Hyung-Kyu Choi <hk0110.choi@samsung.com>
src/runtime/ref/nn/CMakeLists.txt
src/runtime/ref/nn/common/CMakeLists.txt [new file with mode: 0644]
src/runtime/ref/nn/common/CpuExecutor.cpp [new file with mode: 0644]
src/runtime/ref/nn/common/include/CpuExecutor.h [new file with mode: 0644]

index 4c5f637..127d8a1 100644 (file)
@@ -1 +1,2 @@
 add_subdirectory(runtime)
+add_subdirectory(common)
diff --git a/src/runtime/ref/nn/common/CMakeLists.txt b/src/runtime/ref/nn/common/CMakeLists.txt
new file mode 100644 (file)
index 0000000..a01f3b5
--- /dev/null
@@ -0,0 +1,5 @@
+# Library `runtime_ref_common`
+SET (RUNTIME_SRCS CpuExecutor.cpp)
+
+add_library(runtime_ref_common SHARED ${RUNTIME_SRCS})
+include_directories(runtime_ref_common PRIVATE . include ../runtime/include)
diff --git a/src/runtime/ref/nn/common/CpuExecutor.cpp b/src/runtime/ref/nn/common/CpuExecutor.cpp
new file mode 100644 (file)
index 0000000..a8502c1
--- /dev/null
@@ -0,0 +1,1264 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "CpuExecutor"
+
+#include "CpuExecutor.h"
+
+#include "NeuralNetworks.h"
+#if 0 // REF-ANN
+#include "Operations.h"
+#endif
+
+#include <sys/mman.h>
+
+namespace android {
+namespace nn {
+
+#if 0 // REF-ANN
+// TODO: short term, make share memory mapping and updating a utility function.
+// TODO: long term, implement mmap_fd as a hidl IMemory service.
+bool RunTimePoolInfo::set(const hidl_memory& hidlMemory) {
+    this->hidlMemory = hidlMemory;
+    auto memType = hidlMemory.name();
+    if (memType == "ashmem") {
+        memory = mapMemory(hidlMemory);
+        if (memory == nullptr) {
+            LOG(ERROR) << "Can't map shared memory.";
+            return false;
+        }
+        memory->update();
+        buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
+        if (buffer == nullptr) {
+            LOG(ERROR) << "Can't access shared memory.";
+            return false;
+        }
+        return true;
+    } else if (memType == "mmap_fd") {
+        size_t size = hidlMemory.size();
+        int fd = hidlMemory.handle()->data[0];
+        int prot = hidlMemory.handle()->data[1];
+        size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
+                                        hidlMemory.handle()->data[3]);
+        buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
+        if (buffer == MAP_FAILED) {
+            LOG(ERROR) << "Can't mmap the file descriptor.";
+            return false;
+        }
+        return true;
+    } else {
+        LOG(ERROR) << "unsupported hidl_memory type";
+        return false;
+    }
+}
+
+// Making sure the output data are correctly updated after execution.
+bool RunTimePoolInfo::update() {
+    auto memType = hidlMemory.name();
+    if (memType == "ashmem") {
+        memory->commit();
+        return true;
+    } else if (memType == "mmap_fd") {
+        int prot = hidlMemory.handle()->data[1];
+        if (prot & PROT_WRITE) {
+            size_t size = hidlMemory.size();
+            return msync(buffer, size, MS_SYNC) == 0;
+        }
+    }
+    // No-op for other types of memory.
+    return true;
+}
+#endif
+
+#if 0 // REF-ANN
+bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
+                                         const hidl_vec<hidl_memory>& pools) {
+    poolInfos->resize(pools.size());
+    for (size_t i = 0; i < pools.size(); i++) {
+        auto& poolInfo = (*poolInfos)[i];
+        if (!poolInfo.set(pools[i])) {
+            LOG(ERROR) << "Could not map pool";
+            return false;
+        }
+    }
+    return true;
+}
+
+// Updates the RunTimeOperandInfo with the newly calculated shape.
+// Allocate the buffer if we need to.
+static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
+    // For user-provided model output operands, the parameters must match the Shape
+    // calculated from the preparation step.
+    if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
+        if (info->type != shape.type ||
+            info->dimensions != shape.dimensions) {
+            LOG(ERROR) << "Invalid type or dimensions for model output";
+            return false;
+        }
+        if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
+            (info->scale != shape.scale || info->zeroPoint != shape.offset)) {
+            LOG(ERROR) << "Invalid scale or zeroPoint for model output";
+            return false;
+        }
+    }
+    info->type = shape.type;
+    info->dimensions = shape.dimensions;
+    info->scale = shape.scale;
+    info->zeroPoint = shape.offset;
+    if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
+        uint32_t length = sizeOfData(info->type, info->dimensions);
+        info->buffer = new uint8_t[length];
+        if (info->buffer == nullptr) {
+            return false;
+        }
+    }
+    return true;
+}
+#endif
+
+#if 0 // REF-ANN
+// Ignore the .pools entry in model and request.  This will have been taken care of
+// by the caller.
+int CpuExecutor::run(const Model& model, const Request& request,
+                     const std::vector<RunTimePoolInfo>& modelPoolInfos,
+                     const std::vector<RunTimePoolInfo>& requestPoolInfos) {
+    VLOG(CPUEXE) << "CpuExecutor::run()";
+    // VLOG(CPUEXE) << "model: " << toString(model);
+    VLOG(CPUEXE) << "request: " << toString(request);
+
+    mModel = &model;
+    mRequest = &request; // TODO check if mRequest is needed
+    initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
+    // The model has serialized the operation in execution order.
+    for (const auto& operation : model.operations) {
+        int n = executeOperation(operation);
+        if (n != ANEURALNETWORKS_NO_ERROR) {
+            return n;
+        }
+    }
+    for (auto runtimeInfo : modelPoolInfos) {
+        runtimeInfo.update();
+    }
+    for (auto runtimeInfo : requestPoolInfos) {
+        runtimeInfo.update();
+    }
+    mModel = nullptr;
+    mRequest = nullptr;
+    VLOG(CPUEXE) << "Completed run normally";
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
+bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
+                                        const std::vector<RunTimePoolInfo>& requestPoolInfos) {
+    VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
+    const size_t count = mModel->operands.size();
+    mOperands.resize(count);
+
+    // Start by setting the runtime info to what's in the model.
+    for (size_t i = 0; i < count; i++) {
+        const Operand& from = mModel->operands[i];
+        RunTimeOperandInfo& to = mOperands[i];
+        to.type = from.type;
+        to.dimensions = from.dimensions;
+        to.scale = from.scale;
+        to.zeroPoint = from.zeroPoint;
+        to.length = from.location.length;
+        to.lifetime = from.lifetime;
+        switch (from.lifetime) {
+            case OperandLifeTime::TEMPORARY_VARIABLE:
+                to.buffer = nullptr;
+                to.numberOfUsesLeft = from.numberOfConsumers;
+                break;
+            case OperandLifeTime::CONSTANT_COPY:
+                to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
+                to.numberOfUsesLeft = 0;
+                break;
+            case OperandLifeTime::CONSTANT_REFERENCE: {
+                auto poolIndex = from.location.poolIndex;
+                nnAssert(poolIndex < modelPoolInfos.size());
+                auto& r = modelPoolInfos[poolIndex];
+                to.buffer = r.buffer + from.location.offset;
+                to.numberOfUsesLeft = 0;
+                break;
+            }
+            case OperandLifeTime::MODEL_INPUT:
+            case OperandLifeTime::MODEL_OUTPUT:
+            case OperandLifeTime::NO_VALUE:
+                to.buffer = nullptr;
+                to.numberOfUsesLeft = 0;
+                break;
+            default:
+                nnAssert(false);
+                break;
+        }
+    }
+
+    // Adjust the runtime info for the arguments passed to the model,
+    // modifying the buffer location, and possibly the dimensions.
+    auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
+                                  const hidl_vec<RequestArgument>& arguments) {
+        nnAssert(indexes.size() == arguments.size());
+        for (size_t i = 0; i < indexes.size(); i++) {
+            const uint32_t operandIndex = indexes[i];
+            const RequestArgument& from = arguments[i];
+            RunTimeOperandInfo& to = mOperands[operandIndex];
+            if (from.dimensions.size() > 0) {
+                // It's the responsibility of the caller to validate that
+                // from.dimensions only modifies the dimensions that were
+                // unspecified in the model.  That's the case in SampleDriver.cpp
+                // with the call to validateRequest().
+                // TODO make sure that's the case for the default CPU path.
+                to.dimensions = from.dimensions;
+            }
+            if (from.hasNoValue) {
+                to.lifetime = OperandLifeTime::NO_VALUE;
+                nnAssert(to.buffer == nullptr);
+            } else {
+                auto poolIndex = from.location.poolIndex;
+                nnAssert(poolIndex < requestPoolInfos.size());
+                auto& r = requestPoolInfos[poolIndex];
+                to.buffer = r.buffer + from.location.offset;
+            }
+        }
+    };
+    updateForArguments(mModel->inputIndexes, mRequest->inputs);
+    updateForArguments(mModel->outputIndexes, mRequest->outputs);
+
+    return true;
+}
+
+void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
+    for (uint32_t i : inputs) {
+        auto& info = mOperands[i];
+        // Check if it's a static or model input/output.
+        if (info.numberOfUsesLeft == 0) {
+            continue;
+        }
+        info.numberOfUsesLeft--;
+        if (info.numberOfUsesLeft == 0) {
+            nnAssert(info.buffer != nullptr);
+            delete[] info.buffer;
+            info.buffer = nullptr;
+        }
+    }
+}
+
+
+int CpuExecutor::executeOperation(const Operation& operation) {
+    // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
+    const hidl_vec<uint32_t>& ins = operation.inputs;
+    const hidl_vec<uint32_t>& outs = operation.outputs;
+    bool success = false;
+
+    // Function to verify that the number of input and output parameters
+    // matches what is expected.  Also checks that all the parameters have
+    // values. This function is to be used only for operations that do not
+    // accept optional arguments.
+    // TODO Have a version that works for optional arguments.
+    auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
+                                                                size_t requiredOuts) -> bool {
+        auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
+                          const char* type) -> bool {
+            size_t actualCount = indexes.size();
+            if (actualCount != requiredCount) {
+                LOG(ERROR) << getOperationName(operation.type)
+                           << ": Invalid number of " << type << " operands. Got " << actualCount
+                           << " of " << requiredCount;
+                return false;
+            }
+            for (size_t i = 0; i < actualCount; i++) {
+                if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
+                    LOG(ERROR) << getOperationName(operation.type) << " " << type
+                               << " operand " << i << " is required but missing.";
+                    return false;
+                }
+            }
+            return true;
+        };
+        return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
+    };
+
+    switch (operation.type) {
+        case OperationType::OEM_OPERATION: {
+            LOG(ERROR) << "OEM operation not supported for CPU execution";
+            success = false;
+        } break;
+        case OperationType::ADD: {
+            if (!allParametersPresent(3, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& in1 = mOperands[ins[0]];
+            const RunTimeOperandInfo& in2 = mOperands[ins[1]];
+            int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+            RunTimeOperandInfo& out = mOperands[outs[0]];
+            Shape outShape = out.shape();
+
+            if (in1.type == OperandType::TENSOR_FLOAT32) {
+                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&out, outShape) &&
+                          addFloat32(reinterpret_cast<const float*>(in1.buffer),
+                                     in1.shape(),
+                                     reinterpret_cast<const float*>(in2.buffer),
+                                     in2.shape(),
+                                     activation,
+                                     reinterpret_cast<float*>(out.buffer),
+                                     outShape);
+            } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&out, outShape) &&
+                          addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
+                                    in1.shape(),
+                                    reinterpret_cast<const uint8_t*>(in2.buffer),
+                                    in2.shape(),
+                                    activation,
+                                    reinterpret_cast<uint8_t*>(out.buffer),
+                                    outShape);
+            }
+        } break;
+        case OperationType::MUL: {
+            if (!allParametersPresent(3, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& in1 = mOperands[ins[0]];
+            const RunTimeOperandInfo& in2 = mOperands[ins[1]];
+            int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+            RunTimeOperandInfo& out = mOperands[outs[0]];
+            Shape outShape = out.shape();
+
+            if (in1.type == OperandType::TENSOR_FLOAT32) {
+                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&out, outShape) &&
+                          mulFloat32(reinterpret_cast<const float*>(in1.buffer),
+                                     in1.shape(),
+                                     reinterpret_cast<const float*>(in2.buffer),
+                                     in2.shape(),
+                                     activation,
+                                     reinterpret_cast<float*>(out.buffer),
+                                     outShape);
+            } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&out, outShape) &&
+                          mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
+                                    in1.shape(),
+                                    reinterpret_cast<const uint8_t*>(in2.buffer),
+                                    in2.shape(),
+                                    activation,
+                                    reinterpret_cast<uint8_t*>(out.buffer),
+                                    outShape);
+            }
+        } break;
+        case OperationType::FLOOR: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = floorPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          floorFloat32(reinterpret_cast<const float*>(input.buffer),
+                                       reinterpret_cast<float*>(output.buffer),
+                                       outShape);
+            }
+        } break;
+        case OperationType::DEQUANTIZE: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = dequantizePrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          dequantizeQuant8ToFloat32(
+                                  reinterpret_cast<const uint8_t*>(input.buffer),
+                                  reinterpret_cast<float*>(output.buffer),
+                                  input.shape());
+            }
+        } break;
+        case OperationType::DEPTHWISE_CONV_2D: {
+            const size_t inCount = ins.size();
+            if ((inCount != 11 && inCount != 8) ||
+                    !allParametersPresent(inCount, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input  = mOperands[ins[0]];
+            const RunTimeOperandInfo& filter = mOperands[ins[1]];
+            const RunTimeOperandInfo& bias   = mOperands[ins[2]];
+
+            int32_t padding_left, padding_right;
+            int32_t padding_top, padding_bottom;
+            int32_t stride_width, stride_height;
+            int32_t depth_multiplier;
+            int32_t activation;
+
+            if (inCount == 11) {
+                padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
+                padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
+                padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
+                depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[10]]);
+            } else {
+                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
+                depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[7]]);
+
+                Shape inputShape = input.shape();
+                Shape filterShape = filter.shape();
+                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_height = getSizeOfDimension(inputShape, 1);
+                int32_t filter_width  = getSizeOfDimension(filterShape, 2);
+                int32_t filter_height = getSizeOfDimension(filterShape, 1);
+                calculateExplicitPadding(input_width, stride_width,
+                                         filter_width, padding_implicit,
+                                         &padding_left, &padding_right);
+                calculateExplicitPadding(input_height, stride_height,
+                                         filter_height, padding_implicit,
+                                         &padding_top, &padding_bottom);
+            }
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
+                                               padding_left, padding_right,
+                                               padding_top, padding_bottom,
+                                               stride_width, stride_height,
+                                               &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
+                                               input.shape(),
+                                               reinterpret_cast<const float*>(filter.buffer),
+                                               filter.shape(),
+                                               reinterpret_cast<const float*>(bias.buffer),
+                                               bias.shape(),
+                                               padding_left, padding_right,
+                                               padding_top, padding_bottom,
+                                               stride_width, stride_height,
+                                               depth_multiplier, activation,
+                                               reinterpret_cast<float*>(output.buffer),
+                                               outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
+                                               padding_left, padding_right,
+                                               padding_top, padding_bottom,
+                                               stride_width, stride_height,
+                                               &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                              input.shape(),
+                                              reinterpret_cast<const uint8_t*>(filter.buffer),
+                                              filter.shape(),
+                                              reinterpret_cast<const int32_t*>(bias.buffer),
+                                              bias.shape(),
+                                              padding_left, padding_right,
+                                              padding_top, padding_bottom,
+                                              stride_width, stride_height,
+                                              depth_multiplier, activation,
+                                              reinterpret_cast<uint8_t*>(output.buffer),
+                                              outShape);
+            }
+
+        } break;
+        case OperationType::CONV_2D: {
+            const size_t inCount = ins.size();
+            if ((inCount != 10 && inCount != 7) ||
+                    !allParametersPresent(inCount, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input  = mOperands[ins[0]];
+            const RunTimeOperandInfo& filter = mOperands[ins[1]];
+            const RunTimeOperandInfo& bias   = mOperands[ins[2]];
+
+            int32_t padding_left, padding_right;
+            int32_t padding_top, padding_bottom;
+            int32_t stride_width, stride_height;
+            int32_t activation;
+
+            if (inCount == 10) {
+                padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
+                padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
+                padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+            } else {
+                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
+
+                Shape inputShape = input.shape();
+                Shape filterShape = filter.shape();
+                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_height = getSizeOfDimension(inputShape, 1);
+                int32_t filter_width  = getSizeOfDimension(filterShape, 2);
+                int32_t filter_height = getSizeOfDimension(filterShape, 1);
+                calculateExplicitPadding(input_width, stride_width,
+                                         filter_width, padding_implicit,
+                                         &padding_left, &padding_right);
+                calculateExplicitPadding(input_height, stride_height,
+                                         filter_height, padding_implicit,
+                                         &padding_top, &padding_bottom);
+            }
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = convPrepare(input.shape(), filter.shape(), bias.shape(),
+                                      padding_left, padding_right,
+                                      padding_top, padding_bottom,
+                                      stride_width, stride_height,
+                                      &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
+                                      reinterpret_cast<const float*>(filter.buffer), filter.shape(),
+                                      reinterpret_cast<const float*>(bias.buffer), bias.shape(),
+                                      padding_left, padding_right,
+                                      padding_top, padding_bottom,
+                                      stride_width, stride_height, activation,
+                                      reinterpret_cast<float*>(output.buffer), outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = convPrepare(input.shape(), filter.shape(), bias.shape(),
+                                      padding_left, padding_right,
+                                      padding_top, padding_bottom,
+                                      stride_width, stride_height,
+                                      &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                     input.shape(),
+                                     reinterpret_cast<const uint8_t*>(filter.buffer),
+                                     filter.shape(),
+                                     reinterpret_cast<const int32_t*>(bias.buffer),
+                                     bias.shape(),
+                                     padding_left, padding_right,
+                                     padding_top, padding_bottom,
+                                     stride_width, stride_height, activation,
+                                     reinterpret_cast<uint8_t*>(output.buffer),
+                                     outShape);
+            }
+        } break;
+        case OperationType::AVERAGE_POOL_2D: {
+            const size_t inCount = ins.size();
+            if ((inCount != 10 && inCount != 7) ||
+                    !allParametersPresent(inCount, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+
+            int32_t padding_left, padding_right;
+            int32_t padding_top, padding_bottom;
+            int32_t stride_width, stride_height;
+            int32_t filter_width, filter_height;
+            int32_t activation;
+
+            if (inCount == 10) {
+                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
+                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
+                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
+                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
+                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+            } else {
+                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
+                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
+                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
+
+                Shape inputShape = input.shape();
+                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_height = getSizeOfDimension(inputShape, 1);
+                calculateExplicitPadding(input_width, stride_width,
+                                         filter_width, padding_implicit,
+                                         &padding_left, &padding_right);
+                calculateExplicitPadding(input_height, stride_height,
+                                         filter_height, padding_implicit,
+                                         &padding_top, &padding_bottom);
+            }
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericPoolingPrepare(input.shape(),
+                                                padding_left, padding_right,
+                                                padding_top, padding_bottom,
+                                                stride_width, stride_height,
+                                                filter_width, filter_height,
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
+                                             input.shape(),
+                                             padding_left, padding_right,
+                                             padding_top, padding_bottom,
+                                             stride_width, stride_height,
+                                             filter_width, filter_height, activation,
+                                             reinterpret_cast<float*>(output.buffer),
+                                             outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericPoolingPrepare(input.shape(),
+                                                padding_left, padding_right,
+                                                padding_top, padding_bottom,
+                                                stride_width, stride_height,
+                                                filter_width, filter_height,
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                            input.shape(),
+                                            padding_left, padding_right,
+                                            padding_top, padding_bottom,
+                                            stride_width, stride_height,
+                                            filter_width, filter_height, activation,
+                                            reinterpret_cast<uint8_t*>(output.buffer),
+                                            outShape);
+            }
+        } break;
+        case OperationType::L2_POOL_2D: {
+            const size_t inCount = ins.size();
+            if ((inCount != 10 && inCount != 7) ||
+                    !allParametersPresent(inCount, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+
+            int32_t padding_left, padding_right;
+            int32_t padding_top, padding_bottom;
+            int32_t stride_width, stride_height;
+            int32_t filter_width, filter_height;
+            int32_t activation;
+
+            if (inCount == 10) {
+                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
+                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
+                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
+                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
+                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+            } else {
+                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
+                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
+                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
+
+                Shape inputShape = input.shape();
+                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_height = getSizeOfDimension(inputShape, 1);
+                calculateExplicitPadding(input_width, stride_width,
+                                         filter_width, padding_implicit,
+                                         &padding_left, &padding_right);
+                calculateExplicitPadding(input_height, stride_height,
+                                         filter_height, padding_implicit,
+                                         &padding_top, &padding_bottom);
+            }
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericPoolingPrepare(input.shape(),
+                                                padding_left, padding_right,
+                                                padding_top, padding_bottom,
+                                                stride_width, stride_height,
+                                                filter_width, filter_height,
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
+                                        input.shape(),
+                                        padding_left, padding_right,
+                                        padding_top, padding_bottom,
+                                        stride_width, stride_height,
+                                        filter_width, filter_height, activation,
+                                        reinterpret_cast<float*>(output.buffer),
+                                        outShape);
+            }
+        } break;
+        case OperationType::MAX_POOL_2D: {
+            const size_t inCount = ins.size();
+            if ((inCount != 10 && inCount != 7) ||
+                    !allParametersPresent(inCount, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+
+            int32_t padding_left, padding_right;
+            int32_t padding_top, padding_bottom;
+            int32_t stride_width, stride_height;
+            int32_t filter_width, filter_height;
+            int32_t activation;
+
+            if (inCount == 10) {
+                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
+                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
+                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
+                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
+                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+            } else {
+                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
+                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
+                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
+                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
+                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
+                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
+
+                Shape inputShape = input.shape();
+                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_height = getSizeOfDimension(inputShape, 1);
+                calculateExplicitPadding(input_width, stride_width,
+                                         filter_width, padding_implicit,
+                                         &padding_left, &padding_right);
+                calculateExplicitPadding(input_height, stride_height,
+                                         filter_height, padding_implicit,
+                                         &padding_top, &padding_bottom);
+            }
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericPoolingPrepare(input.shape(),
+                                                padding_left, padding_right,
+                                                padding_top, padding_bottom,
+                                                stride_width, stride_height,
+                                                filter_width, filter_height,
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
+                                         input.shape(),
+                                         padding_left, padding_right,
+                                         padding_top, padding_bottom,
+                                         stride_width, stride_height,
+                                         filter_width, filter_height, activation,
+                                         reinterpret_cast<float*>(output.buffer),
+                                         outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericPoolingPrepare(input.shape(),
+                                                padding_left, padding_right,
+                                                padding_top, padding_bottom,
+                                                stride_width, stride_height,
+                                                filter_width, filter_height,
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                        input.shape(),
+                                        padding_left, padding_right,
+                                        padding_top, padding_bottom,
+                                        stride_width, stride_height,
+                                        filter_width, filter_height, activation,
+                                        reinterpret_cast<uint8_t*>(output.buffer),
+                                        outShape);
+            }
+
+        } break;
+        case OperationType::RELU: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          reluFloat32(reinterpret_cast<const float*>(input.buffer),
+                                      input.shape(),
+                                      reinterpret_cast<float*>(output.buffer),
+                                      outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                     input.shape(),
+                                     reinterpret_cast<uint8_t*>(output.buffer),
+                                     outShape);
+            }
+        } break;
+        case OperationType::RELU1: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          relu1Float32(reinterpret_cast<const float*>(input.buffer),
+                                       input.shape(),
+                                       reinterpret_cast<float*>(output.buffer),
+                                       outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                      input.shape(),
+                                      reinterpret_cast<uint8_t*>(output.buffer),
+                                      outShape);
+            }
+        } break;
+        case OperationType::RELU6: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          relu6Float32(reinterpret_cast<const float*>(input.buffer),
+                                       input.shape(),
+                                       reinterpret_cast<float*>(output.buffer),
+                                       outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                      input.shape(),
+                                      reinterpret_cast<uint8_t*>(output.buffer),
+                                      outShape);
+            }
+        } break;
+        case OperationType::TANH: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          tanhFloat32(reinterpret_cast<const float*>(input.buffer),
+                                      input.shape(),
+                                      reinterpret_cast<float*>(output.buffer),
+                                      outShape);
+            }
+        } break;
+        case OperationType::LOGISTIC: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          logisticFloat32(reinterpret_cast<const float*>(input.buffer),
+                                          input.shape(),
+                                          reinterpret_cast<float*>(output.buffer),
+                                          outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                         input.shape(),
+                                         reinterpret_cast<uint8_t*>(output.buffer),
+                                         outShape);
+            }
+        } break;
+        case OperationType::SOFTMAX: {
+            if (!allParametersPresent(2, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            RunTimeOperandInfo& input = mOperands[ins[0]];
+            float beta = getScalarData<float>(mOperands[ins[1]]);
+            if (beta <= 0.0f) {
+                LOG(ERROR) << "beta must be positive for softmax";
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          softmaxFloat32(reinterpret_cast<const float*>(input.buffer),
+                                         input.shape(),
+                                         beta,
+                                         reinterpret_cast<float*>(output.buffer),
+                                         output.shape());
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericActivationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                        input.shape(),
+                                        beta,
+                                        reinterpret_cast<uint8_t*>(output.buffer),
+                                        output.shape());
+            }
+        } break;
+        case OperationType::FULLY_CONNECTED: {
+            if (!allParametersPresent(4, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            RunTimeOperandInfo& input   = mOperands[ins[0]];
+            RunTimeOperandInfo& weights = mOperands[ins[1]];
+            RunTimeOperandInfo& bias    = mOperands[ins[2]];
+
+            int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer),
+                                                input.shape(),
+                                                reinterpret_cast<const float*>(weights.buffer),
+                                                weights.shape(),
+                                                reinterpret_cast<const float*>(bias.buffer),
+                                                bias.shape(),
+                                                activation,
+                                                reinterpret_cast<float*>(output.buffer),
+                                                outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                               input.shape(),
+                                               reinterpret_cast<const uint8_t*>(weights.buffer),
+                                               weights.shape(),
+                                               reinterpret_cast<const int32_t*>(bias.buffer),
+                                               bias.shape(),
+                                               activation,
+                                               reinterpret_cast<uint8_t*>(output.buffer),
+                                               outShape);
+            }
+        } break;
+        case OperationType::CONCATENATION: {
+            if (outs.size() != 1 || ins.size() < 2) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            int numInputTensors = ins.size() - 1;
+            int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            const RunTimeOperandInfo& firstInput = mOperands[ins[0]];
+            if (firstInput.type == OperandType::TENSOR_FLOAT32) {
+                std::vector<Shape> inputShapes(numInputTensors);
+                std::vector<const float*> inputDataPtrs(numInputTensors);
+
+                for (int i=0; i<numInputTensors; i++) {
+                    RunTimeOperandInfo& input = mOperands[ins[i]];
+                    inputShapes[i] = input.shape();
+                    inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer);
+                }
+                success = concatenationPrepare(inputShapes, axis, &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          concatenationFloat32(inputDataPtrs, inputShapes, axis,
+                                               reinterpret_cast<float*>(output.buffer), outShape);
+            } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                std::vector<Shape> inputShapes(numInputTensors);
+                std::vector<const uint8_t*> inputDataPtrs(numInputTensors);
+
+                for (int i=0; i<numInputTensors; i++) {
+                    RunTimeOperandInfo& input = mOperands[ins[i]];
+                    inputShapes[i] = input.shape();
+                    inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer);
+                }
+                success = concatenationPrepare(inputShapes, axis, &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          concatenationQuant8(inputDataPtrs, inputShapes, axis,
+                                              reinterpret_cast<uint8_t*>(output.buffer),
+                                              outShape);
+            }
+        } break;
+        case OperationType::L2_NORMALIZATION: {
+            if (!allParametersPresent(1, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericNormalizationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          l2normFloat32(reinterpret_cast<const float*>(input.buffer),
+                                        input.shape(),
+                                        reinterpret_cast<float*>(output.buffer),
+                                        outShape);
+            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                success = genericNormalizationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+                                       input.shape(),
+                                       reinterpret_cast<uint8_t*>(output.buffer),
+                                       outShape);
+            }
+        } break;
+        case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
+            if (!allParametersPresent(5, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
+            float bias = getScalarData<float>(mOperands[ins[2]]);
+            float alpha = getScalarData<float>(mOperands[ins[3]]);
+            float beta = getScalarData<float>(mOperands[ins[4]]);
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = genericNormalizationPrepare(input.shape(), &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer),
+                                                   input.shape(),
+                                                   radius, bias, alpha, beta,
+                                                   reinterpret_cast<float*>(output.buffer),
+                                                   outShape);
+            }
+        } break;
+        case OperationType::RESHAPE: {
+            if (!allParametersPresent(2, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            success = reshapePrepare(input.shape(),
+                                     reinterpret_cast<const int32_t*>(targetShape.buffer),
+                                     getNumberOfElements(targetShape.shape()),
+                                     &outShape) &&
+                      setInfoAndAllocateIfNeeded(&output, outShape) &&
+                      reshapeGeneric(reinterpret_cast<const void*>(input.buffer),
+                                     input.shape(),
+                                     reinterpret_cast<void*>(output.buffer),
+                                     outShape);
+        } break;
+        case OperationType::RESIZE_BILINEAR: {
+            if (!allParametersPresent(3, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            int32_t width = getScalarData<int32_t>(mOperands[ins[1]]);
+            int32_t height = getScalarData<int32_t>(mOperands[ins[2]]);
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            if (input.type == OperandType::TENSOR_FLOAT32) {
+                success = resizeBilinearPrepare(input.shape(),
+                                                width, height,
+                                                &outShape) &&
+                          setInfoAndAllocateIfNeeded(&output, outShape) &&
+                          resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer),
+                                                input.shape(),
+                                                reinterpret_cast<float*>(output.buffer),
+                                                outShape);
+            }
+        } break;
+        case OperationType::DEPTH_TO_SPACE: {
+            if (!allParametersPresent(2, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            success = depthToSpacePrepare(input.shape(),
+                                          blockSize,
+                                          &outShape) &&
+                      setInfoAndAllocateIfNeeded(&output, outShape) &&
+                      depthToSpaceGeneric(input.buffer,
+                                          input.shape(),
+                                          blockSize,
+                                          output.buffer,
+                                          outShape);
+        } break;
+        case OperationType::SPACE_TO_DEPTH: {
+            if (!allParametersPresent(2, 1)) {
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
+            int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
+
+            RunTimeOperandInfo& output = mOperands[outs[0]];
+            Shape outShape = output.shape();
+
+            success = spaceToDepthPrepare(input.shape(),
+                                          blockSize,
+                                          &outShape) &&
+                      setInfoAndAllocateIfNeeded(&output, outShape) &&
+                      spaceToDepthGeneric(input.buffer,
+                                          input.shape(),
+                                          blockSize,
+                                          output.buffer,
+                                          outShape);
+        } break;
+        case OperationType::EMBEDDING_LOOKUP: {
+            const RunTimeOperandInfo &values =
+                mOperands[ins[EmbeddingLookup::kValueTensor]];
+            const RunTimeOperandInfo &lookups =
+                mOperands[ins[EmbeddingLookup::kLookupTensor]];
+            RunTimeOperandInfo &output =
+                mOperands[outs[EmbeddingLookup::kOutputTensor]];
+
+            Shape outputShape;
+            EmbeddingLookup lookup(operation, mOperands);
+
+            success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
+                setInfoAndAllocateIfNeeded(&output, outputShape) &&
+                lookup.Eval();
+        } break;
+        case OperationType::HASHTABLE_LOOKUP: {
+            const RunTimeOperandInfo &lookups =
+                mOperands[ins[HashtableLookup::kLookupTensor]];
+            const RunTimeOperandInfo &keys =
+                mOperands[ins[HashtableLookup::kKeyTensor]];
+            const RunTimeOperandInfo &values =
+                mOperands[ins[HashtableLookup::kValueTensor]];
+
+            RunTimeOperandInfo &output =
+                mOperands[outs[HashtableLookup::kOutputTensor]];
+            RunTimeOperandInfo &hits =
+                mOperands[outs[HashtableLookup::kHitsTensor]];
+
+            Shape outputShape, hitShape;
+            HashtableLookup lookup(operation, mOperands);
+
+            success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
+                                             &outputShape, &hitShape) &&
+                setInfoAndAllocateIfNeeded(&output, outputShape) &&
+                setInfoAndAllocateIfNeeded(&hits, hitShape) &&
+                lookup.Eval();
+        } break;
+        case OperationType::LSH_PROJECTION: {
+            RunTimeOperandInfo &output =
+                mOperands[outs[LSHProjection::kOutputTensor]];
+
+            Shape outputShape;
+            LSHProjection lsh(operation, mOperands);
+
+            success = LSHProjection::Prepare(operation, mOperands,
+                                             &outputShape) &&
+                setInfoAndAllocateIfNeeded(&output, outputShape) &&
+                lsh.Eval();
+        } break;
+        case OperationType::LSTM: {
+            RunTimeOperandInfo &scratch =
+                mOperands[outs[LSTMCell::kScratchBufferTensor]];
+            RunTimeOperandInfo &outputStateOut =
+                mOperands[outs[LSTMCell::kOutputStateOutTensor]];
+            RunTimeOperandInfo &cellStateOut =
+                mOperands[outs[LSTMCell::kCellStateOutTensor]];
+            RunTimeOperandInfo &output =
+                mOperands[outs[LSTMCell::kOutputTensor]];
+
+            Shape scratchShape, outputStateShape, cellStateShape, outputShape;
+            LSTMCell lstm_cell(operation, mOperands);
+
+            success = LSTMCell::Prepare(operation, mOperands,
+                                        &scratchShape, &outputStateShape,
+                                        &cellStateShape, &outputShape) &&
+                setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
+                setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
+                setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
+                setInfoAndAllocateIfNeeded(&output, outputShape) &&
+                lstm_cell.Eval();
+        } break;
+        case OperationType::RNN: {
+            RunTimeOperandInfo &hiddenStateOut =
+                mOperands[outs[RNN::kHiddenStateOutTensor]];
+            RunTimeOperandInfo &output =
+                mOperands[outs[RNN::kOutputTensor]];
+
+            Shape hiddenStateShape, outputShape;
+            RNN rnn_cell(operation, mOperands);
+
+            success = RNN::Prepare(operation, mOperands,
+                                   &hiddenStateShape, &outputShape) &&
+                setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) &&
+                setInfoAndAllocateIfNeeded(&output, outputShape) &&
+                rnn_cell.Eval();
+        } break;
+        case OperationType::SVDF: {
+            RunTimeOperandInfo &stateOut =
+                mOperands[outs[SVDF::kStateOutTensor]];
+            RunTimeOperandInfo &output =
+                mOperands[outs[SVDF::kOutputTensor]];
+
+            Shape stateShape, outputShape;
+            SVDF svdf(operation, mOperands);
+
+            success = SVDF::Prepare(operation, mOperands,
+                                    &stateShape, &outputShape) &&
+                setInfoAndAllocateIfNeeded(&stateOut, stateShape) &&
+                setInfoAndAllocateIfNeeded(&output, outputShape) &&
+                svdf.Eval();
+        } break;
+        default:
+            nnAssert(false);
+            break;
+    }
+    if (!success) {
+        LOG(ERROR) << getOperationName(operation.type) << " failed.";
+        return ANEURALNETWORKS_OP_FAILED;
+    }
+
+    freeNoLongerUsedOperands(ins);
+    return ANEURALNETWORKS_NO_ERROR;
+}
+#endif
+
+} // namespace nn
+} // namespace android
diff --git a/src/runtime/ref/nn/common/include/CpuExecutor.h b/src/runtime/ref/nn/common/include/CpuExecutor.h
new file mode 100644 (file)
index 0000000..d63d67e
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
+#define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
+
+#if 0 // REF-ANN
+#include "HalInterfaces.h"
+#include "OperationsUtils.h"
+#include "Utils.h"
+#endif
+
+#include <algorithm>
+#include <vector>
+
+namespace android {
+namespace nn {
+
+// Information we maintain about each operand during execution that
+// may change during execution.
+struct RunTimeOperandInfo {
+
+#if 0 // REF-ANN
+    // TODO Storing the type here is redundant, as it won't change during execution.
+    OperandType type;
+    // The type and dimensions of the operand.  The dimensions can
+    // change at runtime.  We include the type because it's useful
+    // to pass together with the dimension to the functions implementing
+    // the operators.
+    std::vector<uint32_t> dimensions;
+
+    float scale;
+    int32_t zeroPoint;
+    // Where the operand's data is stored.  Check the corresponding
+    // location information in the model to figure out if this points
+    // to memory we have allocated for an temporary operand.
+    uint8_t* buffer;
+    // The length of the buffer.
+    uint32_t length;
+    // Whether this is a temporary variable, a model input, a constant, etc.
+    OperandLifeTime lifetime;
+    // Keeps track of how many operations have yet to make use
+    // of this temporary variable.  When the count is decremented to 0,
+    // we free the buffer.  For non-temporary variables, this count is
+    // always 0.
+    uint32_t numberOfUsesLeft;
+
+    Shape shape() const {
+        return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
+    }
+#endif
+};
+
+// Used to keep a pointer to each of the memory pools.
+struct RunTimePoolInfo {
+#if 0 // REF-ANN
+    sp<IMemory> memory;
+    hidl_memory hidlMemory;
+    uint8_t* buffer;
+
+    bool set(const hidl_memory& hidlMemory);
+    bool update();
+#endif
+};
+
+// TODO-REF Original code from android
+#if 0
+bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
+                                         const hidl_vec<hidl_memory>& pools);
+#endif
+
+// This class is used to execute a model on the CPU.
+class CpuExecutor {
+#if 0 // REF-ANN
+public:
+    // Executes the model. The results will be stored at the locations
+    // specified in the constructor.
+    // The model must outlive the executor.  We prevent it from being modified
+    // while this is executing.
+    int run(const Model& model, const Request& request,
+            const std::vector<RunTimePoolInfo>& modelPoolInfos,
+            const std::vector<RunTimePoolInfo>& requestPoolInfos);
+
+private:
+    bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
+                               const std::vector<RunTimePoolInfo>& requestPoolInfos);
+    // Runs one operation of the graph.
+    int executeOperation(const Operation& entry);
+    // Decrement the usage count for the operands listed.  Frees the memory
+    // allocated for any temporary variable with a count of zero.
+    void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
+
+    // The model and the request that we'll execute. Only valid while run()
+    // is being executed.
+    const Model* mModel = nullptr;
+    const Request* mRequest = nullptr;
+
+    // We're copying the list of all the dimensions from the model, as
+    // these may be modified when we run the operatins.  Since we're
+    // making a full copy, the indexes used in the operand description
+    // stay valid.
+    //    std::vector<uint32_t> mDimensions;
+    // Runtime information about all the operands.
+    std::vector<RunTimeOperandInfo> mOperands;
+#endif
+};
+
+namespace {
+
+template <typename T>
+T getScalarData(const RunTimeOperandInfo& info) {
+#if 0 // REF-ANN
+  // TODO: Check buffer is at least as long as size of data.
+  T* data = reinterpret_cast<T*>(info.buffer);
+  return data[0];
+#else
+  return (T)0;
+#endif 
+}
+
+inline bool IsNullInput(const RunTimeOperandInfo *input) {
+#if 0 // REF-ANN
+    return input->lifetime == OperandLifeTime::NO_VALUE;
+#else
+    return true;
+#endif
+}
+
+#if 0 // REF-ANN
+inline int NumInputsWithValues(const Operation &operation,
+                               std::vector<RunTimeOperandInfo> &operands) {
+  const std::vector<uint32_t> &inputs = operation.inputs;
+  return std::count_if(inputs.begin(), inputs.end(),
+                       [&operands](uint32_t i) {
+                         return !IsNullInput(&operands[i]);
+                       });
+}
+
+inline int NumOutputs(const Operation &operation) {
+  return operation.outputs.size();
+}
+
+inline size_t NumDimensions(const RunTimeOperandInfo *operand) {
+  return operand->shape().dimensions.size();
+}
+
+inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) {
+  return operand->shape().dimensions[i];
+}
+
+inline RunTimeOperandInfo *GetInput(const Operation &operation,
+                                    std::vector<RunTimeOperandInfo> &operands,
+                                    int index) {
+  return &operands[operation.inputs[index]];
+}
+
+inline RunTimeOperandInfo *GetOutput(const Operation &operation,
+                                     std::vector<RunTimeOperandInfo> &operands,
+                                     int index) {
+  return &operands[operation.outputs[index]];
+}
+#endif
+
+}  // anonymous namespace
+
+} // namespace nn
+} // namespace android
+
+#endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H