From: Hyung-Kyu Choi <hk0110.choi@samsung.com>
Date: Thu, 29 Mar 2018 05:12:15 +0000 (+0900)
Subject: Introduce OperationsUtils
X-Git-Tag: 0.1~522
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a889e69c2dff771668e69efd302871ef0f1388b6;p=platform%2Fcore%2Fml%2Fnnfw.git

Introduce OperationsUtils

- Introduce OperationsUtils
- Make use of types introduced from OperationUtils in CpuExecutor

Signed-off-by: Hyung-Kyu Choi <hk0110.choi@samsung.com>
---

diff --git a/src/runtime/ref/nn/common/CMakeLists.txt b/src/runtime/ref/nn/common/CMakeLists.txt
index 3c6f81b..4223f7f 100644
--- a/src/runtime/ref/nn/common/CMakeLists.txt
+++ b/src/runtime/ref/nn/common/CMakeLists.txt
@@ -10,6 +10,7 @@ SET (INC_DIRS
 
 SET (CUR_SRCS
      ${CMAKE_CURRENT_SOURCE_DIR}/CpuExecutor.cpp
+     ${CMAKE_CURRENT_SOURCE_DIR}/OperationsUtils.cpp
      ${CMAKE_CURRENT_SOURCE_DIR}/Utils.cpp
 )
 SET (SRCS
diff --git a/src/runtime/ref/nn/common/CpuExecutor.cpp b/src/runtime/ref/nn/common/CpuExecutor.cpp
index bf74650..18e4d57 100644
--- a/src/runtime/ref/nn/common/CpuExecutor.cpp
+++ b/src/runtime/ref/nn/common/CpuExecutor.cpp
@@ -19,9 +19,7 @@
 #include "CpuExecutor.h"
 
 #include "NeuralNetworks.h"
-#if 0 // REF-ANN
 #include "Operations.h"
-#endif
 
 #include <sys/mman.h>
 
@@ -98,6 +96,7 @@ bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos
     }
     return true;
 }
+#endif
 
 // Updates the RunTimeOperandInfo with the newly calculated shape.
 // Allocate the buffer if we need to.
@@ -129,7 +128,6 @@ static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& sh
     }
     return true;
 }
-#endif
 
 // Ignore the .pools entry in model and request.  This will have been taken care of
 // by the caller.
@@ -310,9 +308,9 @@ int CpuExecutor::executeOperation(const Operation& operation) {
             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
 
             RunTimeOperandInfo& out = mOperands[outs[0]];
-#if 0 // REF-ANN
             Shape outShape = out.shape();
 
+#if 0 // REF-ANN
             if (in1.type == OperandType::TENSOR_FLOAT32) {
                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
                           setInfoAndAllocateIfNeeded(&out, outShape) &&
diff --git a/src/runtime/ref/nn/common/OperationsUtils.cpp b/src/runtime/ref/nn/common/OperationsUtils.cpp
new file mode 100644
index 0000000..9c3df01
--- /dev/null
+++ b/src/runtime/ref/nn/common/OperationsUtils.cpp
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "OperationsUtils"
+
+#include "OperationsUtils.h"
+#include "Operations.h"
+#include "Utils.h"
+
+#include <cmath>
+
+// TODO-NNRT: There was no <limits> included in Android NN code. Remove this later if unnecessary
+#include <limits>
+
+namespace android {
+namespace nn {
+
+bool SameShape(const Shape& in1, const Shape& in2) {
+    if (in1.type != in2.type || in1.dimensions.size() != in2.dimensions.size()) {
+        return false;
+    }
+    for (size_t i = 0; i < in1.dimensions.size(); i++) {
+        if (in1.dimensions[i] != in2.dimensions[i]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool SetShape(const Shape& in, Shape* out) {
+    if (in.type != out->type || in.dimensions.size() != out->dimensions.size()) {
+        return false;
+    }
+    out->dimensions = in.dimensions;
+    return true;
+}
+
+uint32_t getNumberOfElements(const Shape& shape) {
+    uint32_t count = 1;
+    for (size_t i = 0; i < shape.dimensions.size(); i++) {
+        count *= shape.dimensions[i];
+    }
+    return count;
+}
+
+uint32_t getNumberOfDimensions(const Shape& shape) {
+    return shape.dimensions.size();
+}
+
+uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
+    if (dimensionIdx >= shape.dimensions.size()) {
+        // TODO, log the error
+        return 0;
+    }
+    return shape.dimensions[dimensionIdx];
+}
+
+bool QuantizeMultiplierSmallerThanOne(double double_multiplier,
+                                      int32_t* quantized_multiplier,
+                                      int32_t* right_shift) {
+    NN_OPS_CHECK(double_multiplier >= 0.);
+    NN_OPS_CHECK(double_multiplier < 1.);
+    if (double_multiplier == 0.) {
+        *quantized_multiplier = 0;
+        *right_shift = 0;
+        return true;
+    }
+    NN_OPS_CHECK(double_multiplier > 0.);
+    const double q = std::frexp(double_multiplier, right_shift);
+    *right_shift *= -1;
+    int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+    NN_OPS_CHECK(q_fixed <= (1ll << 31));
+    if (q_fixed == (1ll << 31)) {
+        q_fixed /= 2;
+        --*right_shift;
+    }
+    NN_OPS_CHECK(*right_shift >= 0);
+    NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
+    *quantized_multiplier = static_cast<int32_t>(q_fixed);
+    return true;
+}
+
+bool QuantizeMultiplierGreaterThanOne(double double_multiplier,
+                                      int32_t* quantized_multiplier,
+                                      int* left_shift) {
+    NN_OPS_CHECK(double_multiplier > 1.);
+    const double q = std::frexp(double_multiplier, left_shift);
+    int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+    NN_OPS_CHECK(q_fixed <= (1ll << 31));
+    if (q_fixed == (1ll << 31)) {
+        q_fixed /= 2;
+        ++*left_shift;
+    }
+    NN_OPS_CHECK(*left_shift >= 0);
+    NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
+    *quantized_multiplier = static_cast<int32_t>(q_fixed);
+    return true;
+}
+
+bool GetQuantizedConvolutionMultipler(const Shape& inputShape,
+                                      const Shape& filterShape,
+                                      const Shape& biasShape,
+                                      const Shape& outputShape,
+                                      float* multiplier) {
+    const float input_product_scale = inputShape.scale * filterShape.scale;
+    const float bias_scale = biasShape.scale;
+    const float output_scale = outputShape.scale;
+
+    // The following conditions must be guaranteed by the training pipeline.
+    NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <=
+              1e-6 * std::min(input_product_scale, bias_scale));
+    NN_OPS_CHECK(input_product_scale >= 0);
+    NN_OPS_CHECK(input_product_scale < output_scale);
+    *multiplier = input_product_scale / output_scale;
+    return true;
+}
+
+void CalculateActivationRangeUint8(int32_t activation,
+                                   const Shape& outputShape,
+                                   int32_t* act_min,
+                                   int32_t* act_max) {
+    const int32_t qmin = std::numeric_limits<uint8_t>::min();
+    const int32_t qmax = std::numeric_limits<uint8_t>::max();
+
+    const auto scale = outputShape.scale;
+    const auto zero_point = outputShape.offset;
+
+    auto quantize = [scale, zero_point](float f) {
+        return zero_point + static_cast<int32_t>(std::round(f / scale));
+    };
+
+// TODO-NNRT Enable below code when common/include/ActivationFunctor.h available
+#if 0 // REF-ANN
+    if (activation == kActivationRelu) {
+        *act_min = std::max(qmin, quantize(0.0));
+        *act_max = qmax;
+    } else if (activation == kActivationRelu6) {
+        *act_min = std::max(qmin, quantize(0.0));
+        *act_max = std::min(qmax, quantize(6.0));
+    } else if (activation == kActivationRelu1) {
+        *act_min = std::max(qmin, quantize(-1.0));
+        *act_max = std::min(qmax, quantize(1.0));
+    } else {
+        *act_min = qmin;
+        *act_max = qmax;
+    }
+#endif
+}
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
+    const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+                                      (1ll << (31 - input_integer_bits)) /
+                                      (1ll << input_left_shift);
+    // Tighten bound using floor.  Suppose that we could use the exact value.
+    // After scaling the difference, the result would be at the maximum.  Thus we
+    // must ensure that our value has lower magnitude.
+    return static_cast<int32_t>(std::floor(max_input_rescaled));
+}
+
+bool addMulPrepare(const Shape& in1, const Shape& in2, Shape* out) {
+    NN_OPS_CHECK(getNumberOfDimensions(in1) <= 4 && getNumberOfDimensions(in2) <= 4);
+    NN_OPS_CHECK(in1.type == in2.type);
+    if (SameShape(in1, in2)) {
+        return SetShape(in1, out);
+    } else {
+        // BroadcastAdd needed
+        uint32_t numberOfDims1 = getNumberOfDimensions(in1);
+        uint32_t numberOfDims2 = getNumberOfDimensions(in2);
+        uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
+        out->dimensions = std::vector<uint32_t>(maxDims);
+        for (uint32_t i = 1; i <= maxDims; i++) {
+            uint32_t dim1 = 1;
+            if (i <= numberOfDims1) {
+                dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
+            }
+            uint32_t dim2 = 1;
+            if (i <= numberOfDims2) {
+                dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
+            }
+            if (dim1 != dim2 && dim1 != 1 && dim2 != 1) {
+                LOG(ERROR) << "Dimensions mismatch for BroadcastAdd";
+                return false;
+            }
+            out->dimensions[maxDims - i] = std::max(dim1, dim2);
+        }
+    }
+    return true;
+}
+
+bool floorPrepare(const Shape& input, Shape* output) {
+    return SetShape(input, output);
+}
+
+bool dequantizePrepare(const Shape& input, Shape* output) {
+    if (input.type != OperandType::TENSOR_QUANT8_ASYMM ||
+            output->type != OperandType::TENSOR_FLOAT32) {
+        LOG(ERROR) << "bad input / output operand type.";
+        return false;
+    }
+    if (input.dimensions.size() != output->dimensions.size()) {
+        LOG(ERROR) << "input and output tensors don't have the same rank.";
+        return false;
+    }
+    output->dimensions = input.dimensions;
+    return true;
+}
+
+bool convPrepare(const Shape& input,
+                 const Shape& filter,
+                 const Shape& bias,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 Shape* output) {
+    NN_OPS_CHECK(input.type == filter.type);
+    if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+        NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
+    } else {
+        NN_OPS_CHECK(input.type == bias.type);
+    }
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+    NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
+    NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
+
+    NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
+    NN_OPS_CHECK(getSizeOfDimension(filter, 3) == getSizeOfDimension(input, 3));
+
+    uint32_t channels_out = getSizeOfDimension(filter, 0);
+    uint32_t width        = getSizeOfDimension(input, 2);
+    uint32_t height       = getSizeOfDimension(input, 1);
+    uint32_t filterWidth  = getSizeOfDimension(filter, 2);
+    uint32_t filterHeight = getSizeOfDimension(filter, 1);
+    uint32_t batches      = getSizeOfDimension(input, 0);
+
+    uint32_t outWidth = computeOutSize(width, filterWidth, stride_width,
+                                       padding_left, padding_right);
+    uint32_t outHeight = computeOutSize(height, filterHeight, stride_height,
+                                        padding_top, padding_bottom);
+
+    output->type = input.type;
+    output->dimensions = {batches, outHeight, outWidth, channels_out};
+    return true;
+}
+
+bool depthwiseConvPrepare(const Shape& input,
+                          const Shape& filter,
+                          const Shape& bias,
+                          int32_t padding_left, int32_t padding_right,
+                          int32_t padding_top, int32_t padding_bottom,
+                          int32_t stride_width, int32_t stride_height,
+                          Shape* output) {
+    NN_OPS_CHECK(input.type == filter.type);
+    if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+        NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
+    } else {
+        NN_OPS_CHECK(input.type == bias.type);
+    }
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+    NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
+    NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
+
+    NN_OPS_CHECK(getSizeOfDimension(filter, 3) == getSizeOfDimension(bias, 0));
+
+    uint32_t channels_out = getSizeOfDimension(filter, 3);
+    uint32_t width        = getSizeOfDimension(input, 2);
+    uint32_t height       = getSizeOfDimension(input, 1);
+    uint32_t filterWidth  = getSizeOfDimension(filter, 2);
+    uint32_t filterHeight = getSizeOfDimension(filter, 1);
+    uint32_t batches      = getSizeOfDimension(input, 0);
+
+    uint32_t outWidth = computeOutSize(width, filterWidth, stride_width,
+                                       padding_left, padding_right);
+    uint32_t outHeight = computeOutSize(height, filterHeight, stride_height,
+                                        padding_top, padding_bottom);
+
+    output->type = input.type;
+    output->dimensions = {batches, outHeight, outWidth, channels_out};
+    return true;
+}
+
+
+bool genericPoolingPrepare(const Shape& input,
+                           int32_t padding_left, int32_t padding_right,
+                           int32_t padding_top, int32_t padding_bottom,
+                           int32_t stride_width, int32_t stride_height,
+                           int32_t filter_width, int32_t filter_height,
+                           Shape* output) {
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+
+    uint32_t batches      = getSizeOfDimension(input, 0);
+    uint32_t width        = getSizeOfDimension(input, 2);
+    uint32_t height       = getSizeOfDimension(input, 1);
+    uint32_t channels_out = getSizeOfDimension(input, 3);
+
+    uint32_t outWidth = computeOutSize(width, filter_width, stride_width,
+                                       padding_left, padding_right);
+    uint32_t outHeight = computeOutSize(height, filter_height, stride_height,
+                                        padding_top, padding_bottom);
+
+    output->type = input.type;
+    output->dimensions = {batches, outHeight, outWidth, channels_out};
+    return true;
+}
+
+
+bool genericActivationPrepare(const Shape& input,
+                              Shape* output) {
+    NN_OPS_CHECK(getNumberOfDimensions(input) <= 4);
+    return SetShape(input, output);
+}
+
+bool fullyConnectedPrepare(const Shape& input,
+                           const Shape& weights,
+                           const Shape& bias,
+                           Shape* output) {
+    // Check all the parameters of tensor match within themselves and match the
+    // input configuration.
+    NN_OPS_CHECK(input.type == weights.type);
+    if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
+        NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
+    } else {
+        NN_OPS_CHECK(input.type == bias.type);
+    }
+    NN_OPS_CHECK(getNumberOfDimensions(input) >= 2);
+    uint32_t input_size = getNumberOfElements(input);
+    uint32_t num_units  = getSizeOfDimension(weights, 0);
+    uint32_t batch_size = input_size / getSizeOfDimension(weights, 1);
+
+    NN_OPS_CHECK(getSizeOfDimension(bias, 0) == num_units);
+    NN_OPS_CHECK(getSizeOfDimension(weights, 1) * batch_size == input_size);
+    NN_OPS_CHECK(getNumberOfDimensions(weights) == 2);
+
+    output->type = input.type;
+    output->dimensions = {batch_size, num_units};
+
+    return true;
+}
+
+bool concatenationPrepare(const std::vector<Shape>& inputShapes,
+                          int32_t axis,
+                          Shape* output) {
+
+    int num_inputs = inputShapes.size();
+    OperandType input_type = inputShapes[0].type;
+    uint32_t num_dimensions = getNumberOfDimensions(inputShapes[0]);
+
+    NN_OPS_CHECK(axis >= 0);
+    NN_OPS_CHECK(axis < (int32_t)num_dimensions);
+
+    int sum_axis = getSizeOfDimension(inputShapes[0], axis);
+    for (int i = 1; i < num_inputs; ++i) {
+        NN_OPS_CHECK(getNumberOfDimensions(inputShapes[i]) == num_dimensions);
+        NN_OPS_CHECK(inputShapes[i].type == inputShapes[0].type);
+        if (input_type == OperandType::TENSOR_QUANT8_ASYMM) {
+            NN_OPS_CHECK(inputShapes[0].offset == inputShapes[i].offset);
+            NN_OPS_CHECK(inputShapes[0].scale == inputShapes[i].scale);
+        }
+        for (int d = 0; d < (int32_t)num_dimensions; ++d) {
+            if (d == axis) {
+                sum_axis += getSizeOfDimension(inputShapes[i], axis);
+            } else {
+                NN_OPS_CHECK(getSizeOfDimension(inputShapes[0], d) ==
+                           getSizeOfDimension(inputShapes[i], d));
+            }
+        }
+    }
+
+    output->type = input_type;
+    output->dimensions = inputShapes[0].dimensions;
+    output->dimensions[axis] = sum_axis;
+
+    if (input_type == OperandType::TENSOR_QUANT8_ASYMM) {
+        NN_OPS_CHECK(inputShapes[0].offset == output->offset);
+        NN_OPS_CHECK(inputShapes[0].scale == output->scale);
+    }
+
+    return true;
+}
+
+
+bool genericNormalizationPrepare(const Shape& input, Shape* output) {
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+    return SetShape(input, output);
+}
+
+bool reshapePrepare(const Shape& input,
+                    const int32_t* targetDims,
+                    const int32_t targetDimsSize,
+                    Shape* output) {
+    // Reshape allows one of the targetDims components to have the
+    // special -1 value, meaning it will be calculated automatically based on the
+    // input. Here we calculate what that dimension should be so that the number
+    // of output elements in the same as the number of input elements.
+    int32_t numInputElements = (int32_t) getNumberOfElements(input);
+
+    std::vector<uint32_t> outDims(targetDimsSize);
+    int32_t numOutputElements = 1;
+    int32_t strechDim = -1;
+    for (int32_t i = 0; i < targetDimsSize; ++i) {
+        int32_t value = targetDims[i];
+        if (value == -1) {
+            NN_OPS_CHECK(strechDim == -1);
+            strechDim = i;
+        } else {
+            numOutputElements *= value;
+            outDims[i] = (uint32_t)value;
+        }
+    }
+    if (strechDim != -1) {
+        int32_t strechValue = numInputElements / numOutputElements;
+        outDims[strechDim] = (uint32_t) strechValue;
+        numOutputElements *= strechValue;
+    }
+
+    NN_OPS_CHECK(numInputElements == numOutputElements);
+
+    output->type = input.type;
+    output->dimensions = outDims;
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool resizeBilinearPrepare(const Shape& input,
+                           int32_t width,
+                           int32_t height,
+                           Shape* output) {
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+    uint32_t batches  = getSizeOfDimension(input, 0);
+    uint32_t channels = getSizeOfDimension(input, 3);
+
+    output->type = input.type;
+    output->dimensions = {batches, (uint32_t)height, (uint32_t)width, channels};
+
+    return true;
+}
+
+bool depthToSpacePrepare(const Shape& input,
+                         int32_t blockSize,
+                         Shape* output) {
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+    NN_OPS_CHECK(blockSize > 0);
+
+    uint32_t batches  = getSizeOfDimension(input, 0);
+    uint32_t height   = getSizeOfDimension(input, 1);
+    uint32_t width    = getSizeOfDimension(input, 2);
+    uint32_t channels = getSizeOfDimension(input, 3);
+
+    NN_OPS_CHECK(channels % (blockSize * blockSize) == 0);
+    output->type = input.type;
+    output->dimensions = {batches,
+                          height * blockSize,
+                          width * blockSize,
+                          channels / (blockSize * blockSize)};
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool spaceToDepthPrepare(const Shape& input,
+                         int32_t blockSize,
+                         Shape* output) {
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+    NN_OPS_CHECK(blockSize > 0);
+
+    uint32_t batches  = getSizeOfDimension(input, 0);
+    uint32_t height   = getSizeOfDimension(input, 1);
+    uint32_t width    = getSizeOfDimension(input, 2);
+    uint32_t channels = getSizeOfDimension(input, 3);
+
+    NN_OPS_CHECK(height % blockSize == 0);
+    NN_OPS_CHECK(width % blockSize == 0);
+
+    output->type = input.type;
+    output->dimensions = {batches,
+                          height / blockSize,
+                          width / blockSize,
+                          channels * (blockSize * blockSize)};
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool embeddingLookupPrepare(const Shape &valueShape,
+                            const Shape &lookupShape,
+                            Shape *outputShape) {
+    NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2);
+    NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
+
+    const uint32_t rows     = getSizeOfDimension(valueShape, 0);
+    const uint32_t columns  = getSizeOfDimension(valueShape, 1);
+
+    const uint32_t lookups  = getSizeOfDimension(lookupShape, 0);
+
+    outputShape->type = valueShape.type;
+    outputShape->dimensions = { lookups, columns };
+    for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) {
+        outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
+    }
+    outputShape->offset = valueShape.offset;
+    outputShape->scale = valueShape.scale;
+
+    return true;
+}
+
+bool hashtableLookupPrepare(const Shape &lookupShape,
+                            const Shape &keyShape,
+                            const Shape &valueShape,
+                            Shape *outputShape,
+                            Shape *hitShape) {
+    NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
+    NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1);
+    NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1);
+
+    const uint32_t lookups  = getSizeOfDimension(lookupShape, 0);
+    const uint32_t keys     = getSizeOfDimension(keyShape, 0);
+    const uint32_t rows     = getSizeOfDimension(valueShape, 0);
+    outputShape->type = valueShape.type;
+    outputShape->dimensions = { lookups };
+    for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) {
+        outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
+    }
+    outputShape->offset = valueShape.offset;
+    outputShape->scale = valueShape.scale;
+
+    hitShape->type = OperandType::TENSOR_QUANT8_ASYMM;
+    hitShape->dimensions = { lookups };
+    hitShape->offset = 0;
+    hitShape->scale = 1.f;
+
+    return true;
+}
+
+} // namespace nn
+} // namespace android
diff --git a/src/runtime/ref/nn/common/include/CpuExecutor.h b/src/runtime/ref/nn/common/include/CpuExecutor.h
index 8f961ea..e0a98b7 100644
--- a/src/runtime/ref/nn/common/include/CpuExecutor.h
+++ b/src/runtime/ref/nn/common/include/CpuExecutor.h
@@ -18,9 +18,7 @@
 #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
 
 #include "HalInterfaces.h"
-#if 0 // REF-ANN
 #include "OperationsUtils.h"
-#endif
 #include "Utils.h"
 
 #include <algorithm>
@@ -57,11 +55,9 @@ struct RunTimeOperandInfo {
     // always 0.
     uint32_t numberOfUsesLeft;
 
-#if 0 // REF-ANN
     Shape shape() const {
         return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
     }
-#endif
 };
 
 // Used to keep a pointer to each of the memory pools.
diff --git a/src/runtime/ref/nn/common/include/Operations.h b/src/runtime/ref/nn/common/include/Operations.h
new file mode 100644
index 0000000..006772f
--- /dev/null
+++ b/src/runtime/ref/nn/common/include/Operations.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_H
+#define ANDROID_ML_NN_COMMON_OPERATIONS_H
+
+#if 0 // REF-ANN
+#include "operations/EmbeddingLookup.h"
+#include "operations/HashtableLookup.h"
+#include "operations/LSHProjection.h"
+#include "operations/LSTM.h"
+#include "operations/RNN.h"
+#include "operations/SVDF.h"
+#endif
+
+#include <stddef.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace android {
+namespace nn {
+
+struct Shape;
+
+bool addFloat32(const float* in1, const Shape& shape1,
+                const float* in2, const Shape& shape2,
+                int32_t activation,
+                float* out, const Shape& shapeOut);
+bool addQuant8(const uint8_t* in1, const Shape& shape1,
+               const uint8_t* in2, const Shape& shape2,
+               int32_t activation,
+               uint8_t* out, const Shape& shapeOut);
+
+bool mulFloat32(const float* in1, const Shape& shape1,
+                const float* in2, const Shape& shape2,
+                int32_t activation,
+                float* out, const Shape& shapeOut);
+bool mulQuant8(const uint8_t* in1, const Shape& shape1,
+               const uint8_t* in2, const Shape& shape2,
+               int32_t activation,
+               uint8_t* out, const Shape& shapeOut);
+
+bool floorFloat32(const float* inputData,
+                  float* outputData,
+                  const Shape& shape);
+
+bool dequantizeQuant8ToFloat32(const uint8_t* inputData,
+                               float* outputData,
+                               const Shape& shape);
+
+bool depthwiseConvFloat32(const float* inputData, const Shape& inputShape,
+                          const float* filterData, const Shape& filterShape,
+                          const float* biasData, const Shape& biasShape,
+                          int32_t padding_left, int32_t padding_right,
+                          int32_t padding_top, int32_t padding_bottom,
+                          int32_t stride_width, int32_t stride_height,
+                          int32_t depth_multiplier, int32_t activation,
+                          float* outputData, const Shape& outputShape);
+bool depthwiseConvQuant8(const uint8_t* inputData, const Shape& inputShape,
+                         const uint8_t* filterData, const Shape& filterShape,
+                         const int32_t* biasData, const Shape& biasShape,
+                         int32_t padding_left, int32_t padding_right,
+                         int32_t padding_top, int32_t padding_bottom,
+                         int32_t stride_width, int32_t stride_height,
+                         int32_t depth_multiplier, int32_t activation,
+                         uint8_t* outputData, const Shape& outputShape);
+
+bool convFloat32(const float* inputData, const Shape& inputShape,
+                 const float* filterData, const Shape& filterShape,
+                 const float* biasData, const Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const Shape& outputShape);
+bool convQuant8(const uint8_t* inputData, const Shape& inputShape,
+                const uint8_t* filterData, const Shape& filterShape,
+                const int32_t* biasData, const Shape& biasShape,
+                int32_t padding_left, int32_t padding_right,
+                int32_t padding_top, int32_t padding_bottom,
+                int32_t stride_width, int32_t stride_height,
+                int32_t activation,
+                uint8_t* outputData, const Shape& outputShape);
+
+bool averagePoolFloat32(const float* inputData, const Shape& inputShape,
+                        int32_t padding_left, int32_t padding_right,
+                        int32_t padding_top, int32_t padding_bottom,
+                        int32_t stride_width, int32_t stride_height,
+                        int32_t filter_width, int32_t filter_height, int32_t activation,
+                        float* outputData, const Shape& outputShape);
+bool averagePoolQuant8(const uint8_t* inputData, const Shape& inputShape,
+                       int32_t padding_left, int32_t padding_right,
+                       int32_t padding_top, int32_t padding_bottom,
+                       int32_t stride_width, int32_t stride_height,
+                       int32_t filter_width, int32_t filter_height, int32_t activation,
+                       uint8_t* outputData, const Shape& outputShape);
+bool l2PoolFloat32(const float* inputData, const Shape& inputShape,
+                   int32_t padding_left, int32_t padding_right,
+                   int32_t padding_top, int32_t padding_bottom,
+                   int32_t stride_width, int32_t stride_height,
+                   int32_t filter_width, int32_t filter_height, int32_t activation,
+                   float* outputData, const Shape& outputShape);
+bool maxPoolFloat32(const float* inputData, const Shape& inputShape,
+                    int32_t padding_left, int32_t padding_right,
+                    int32_t padding_top, int32_t padding_bottom,
+                    int32_t stride_width, int32_t stride_height,
+                    int32_t filter_width, int32_t filter_height, int32_t activation,
+                    float* outputData, const Shape& outputShape);
+bool maxPoolQuant8(const uint8_t* inputData, const Shape& inputShape,
+                   int32_t padding_left, int32_t padding_right,
+                   int32_t padding_top, int32_t padding_bottom,
+                   int32_t stride_width, int32_t stride_height,
+                   int32_t filter_width, int32_t filter_height, int32_t activation,
+                   uint8_t* outputData, const Shape& outputShape);
+
+bool reluFloat32(const float* inputData, const Shape& inputShape,
+                 float* outputData, const Shape& outputShape);
+bool relu1Float32(const float* inputData, const Shape& inputShape,
+                  float* outputData, const Shape& outputShape);
+bool relu6Float32(const float* inputData, const Shape& inputShape,
+                  float* outputData, const Shape& outputShape);
+bool tanhFloat32(const float* inputData, const Shape& inputShape,
+                 float* outputData, const Shape& outputShape);
+bool logisticFloat32(const float* inputData, const Shape& inputShape,
+                     float* outputData, const Shape& outputShape);
+bool softmaxFloat32(const float* inputData, const Shape& inputShape,
+                    const float beta,
+                    float* outputData, const Shape& outputShape);
+bool reluQuant8(const uint8_t* inputData, const Shape& inputShape,
+                uint8_t* outputData, const Shape& outputShape);
+bool relu1Quant8(const uint8_t* inputData, const Shape& inputShape,
+                 uint8_t* outputData, const Shape& outputShape);
+bool relu6Quant8(const uint8_t* inputData, const Shape& inputShape,
+                 uint8_t* outputData, const Shape& outputShape);
+bool logisticQuant8(const uint8_t* inputData, const Shape& inputShape,
+                    uint8_t* outputData, const Shape& outputShape);
+bool softmaxQuant8(const uint8_t* inputData, const Shape& inputShape,
+                   const float beta,
+                   uint8_t* outputData, const Shape& outputShape);
+
+bool fullyConnectedFloat32(const float* inputData, const Shape& inputShape,
+                           const float* weights, const Shape& weightsShape,
+                           const float* biasData, const Shape& biasShape,
+                           int32_t activation,
+                           float* outputData, const Shape& outputShape);
+bool fullyConnectedQuant8(const uint8_t* inputData, const Shape& inputShape,
+                          const uint8_t* weights, const Shape& weightsShape,
+                          const int32_t* biasData, const Shape& biasShape,
+                          int32_t activation,
+                          uint8_t* outputData, const Shape& outputShape);
+
+bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs,
+                          const std::vector<Shape>& inputShapes, int32_t axis,
+                          float* outputData, const Shape& outputShape);
+bool concatenationQuant8(const std::vector<const uint8_t*>& inputDataPtrs,
+                         const std::vector<Shape>& inputShapes, int32_t axis,
+                         uint8_t* outputData, const Shape& outputShape);
+
+bool l2normFloat32(const float* inputData, const Shape& inputShape,
+                   float* outputData, const Shape& outputShape);
+bool l2normQuant8(const uint8_t* inputData, const Shape& inputShape,
+                  uint8_t* outputData, const Shape& outputShape);
+bool localResponseNormFloat32(const float* inputData, const Shape& inputShape,
+                              int32_t radius, float bias, float alpha, float beta,
+                              float* outputData, const Shape& outputShape);
+
+bool reshapeGeneric(const void* inputData, const Shape& inputShape,
+                    void* outputData, const Shape& outputShape);
+
+bool resizeBilinearFloat32(const float* inputData,
+                           const Shape& inputShape,
+                           float* outputData,
+                           const Shape& outputShape);
+
+bool depthToSpaceGeneric(const uint8_t* inputData, const Shape& inputShape,
+                         int32_t blockSize,
+                         uint8_t* outputData, const Shape& outputShape);
+
+bool spaceToDepthGeneric(const uint8_t* inputData, const Shape& inputShape,
+                         int32_t blockSize,
+                         uint8_t* outputData, const Shape& outputShape);
+
+} // namespace nn
+} // namespace android
+
+#endif // ANDROID_ML_NN_COMMON_OPERATIONS_H
diff --git a/src/runtime/ref/nn/common/include/OperationsUtils.h b/src/runtime/ref/nn/common/include/OperationsUtils.h
new file mode 100644
index 0000000..aaca0c0
--- /dev/null
+++ b/src/runtime/ref/nn/common/include/OperationsUtils.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
+#define ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
+
+#include "Utils.h"
+
+#include <cstdint>
+#include <vector>
+
+// Macro to check if the input parameters for operation are valid or not.
+#define NN_CHECK(v)                                                     \
+  do {                                                                  \
+    if (!(v)) {                                                         \
+      LOG(ERROR) << "NN_CHECK failed: "  << #v << "'\n";                \
+      return false;                                                     \
+    }                                                                   \
+  } while(0);
+
+#define NN_CHECK_EQ(actual, expected)           \
+  NN_CHECK((actual) == (expected))
+
+#define NN_OPS_CHECK NN_CHECK
+
+namespace android {
+namespace nn {
+
+enum PaddingScheme {
+    kPaddingUnknown = 0,
+    kPaddingSame = 1,
+    kPaddingValid = 2,
+};
+
+// The type and dimensions of an operand.
+struct Shape {
+    OperandType type;
+    std::vector<uint32_t> dimensions;
+    float scale;
+    int32_t offset;
+};
+
+// Verifies that the two shapes are the same.
+bool SameShape(const Shape& in1, const Shape& in2);
+
+// Sets out to the same shape as in.
+bool SetShape(const Shape& in, Shape* out);
+
+// Return the total number of elements, i.e. all the dimensions multiplied
+// together. For a scalar, returns one.
+uint32_t getNumberOfElements(const Shape& shape);
+
+uint32_t getNumberOfDimensions(const Shape& shape);
+
+uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx);
+
+inline uint32_t computeOutSize(uint32_t imageSize, uint32_t filterSize, uint32_t stride,
+                               uint32_t paddingHead, uint32_t paddingTail) {
+    return (imageSize - filterSize + stride + paddingHead + paddingTail) / stride;
+}
+
+__wur
+bool QuantizeMultiplierSmallerThanOne(double double_multiplier,
+                                      int32_t* quantized_multiplier,
+                                      int32_t* right_shift);
+
+__wur
+bool QuantizeMultiplierGreaterThanOne(double double_multiplier,
+                                      int32_t* quantized_multiplier,
+                                      int* left_shift);
+
+__wur
+bool GetQuantizedConvolutionMultipler(const Shape& inputShape,
+                                      const Shape& filterShape,
+                                      const Shape& biasShape,
+                                      const Shape& outputShape,
+                                      float* multiplier);
+
+void CalculateActivationRangeUint8(int32_t activation,
+                                   const Shape& outputShape,
+                                   int32_t* act_min,
+                                   int32_t* act_max);
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
+
+inline void calculateExplicitPadding(int32_t in_size, int32_t stride,
+                                     int32_t filter_size, int32_t padding_implicit,
+                                     int32_t* padding_head, int32_t* padding_tail) {
+    *padding_head = 0;
+    *padding_tail = 0;
+
+    if (padding_implicit == kPaddingSame) {
+        int32_t out_size = (in_size + stride - 1) / stride;
+        int32_t tmp = (out_size - 1) * stride + filter_size;
+        if (tmp > in_size) {
+            *padding_head = (tmp - in_size) / 2;
+            *padding_tail = (tmp - in_size) - *padding_head;
+        }
+    }
+}
+
+inline PaddingScheme getPaddingScheme(int32_t inWidth, int32_t inHeight,
+                                      int32_t strideWidth, int32_t strideHeight,
+                                      int32_t filterWidth, int32_t filterHeight,
+                                      int32_t paddingLeft, int32_t paddingRight,
+                                      int32_t paddingTop, int32_t paddingBottom) {
+    if (paddingLeft == 0 && paddingRight == 0 && paddingTop == 0 && paddingBottom == 0) {
+        return kPaddingValid;
+    }
+
+    int32_t expectedPaddingLeft, expectedPaddingRight;
+    int32_t expectedPaddingTop, expectedPaddingBottom;
+
+    calculateExplicitPadding(inWidth, strideWidth, filterWidth, kPaddingSame,
+                             &expectedPaddingLeft, &expectedPaddingRight);
+    calculateExplicitPadding(inHeight, strideHeight, filterHeight, kPaddingSame,
+                             &expectedPaddingTop, &expectedPaddingBottom);
+    if (expectedPaddingLeft == paddingLeft && expectedPaddingRight == paddingRight &&
+        expectedPaddingTop == paddingTop && expectedPaddingBottom == paddingBottom) {
+        return kPaddingSame;
+    } else {
+        return kPaddingUnknown;
+    }
+}
+
+// Preparation functions for the corresponding ops
+bool addMulPrepare(const Shape& in1, const Shape& in2, Shape* out1);
+
+bool floorPrepare(const Shape& input, Shape* output);
+
+bool dequantizePrepare(const Shape& input, Shape* output);
+
+bool depthwiseConvPrepare(const Shape& input,
+                          const Shape& filter,
+                          const Shape& bias,
+                          int32_t padding_left, int32_t padding_right,
+                          int32_t padding_top, int32_t padding_bottom,
+                          int32_t stride_width, int32_t stride_height,
+                          Shape* output);
+
+bool convPrepare(const Shape& input,
+                 const Shape& filter,
+                 const Shape& bias,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 Shape* output);
+
+bool genericPoolingPrepare(const Shape& input,
+                           int32_t padding_left, int32_t padding_right,
+                           int32_t padding_top, int32_t padding_bottom,
+                           int32_t stride_width, int32_t stride_height,
+                           int32_t filter_width, int32_t filter_height,
+                           Shape* output);
+
+bool genericActivationPrepare(const Shape& input, Shape* output);
+
+bool fullyConnectedPrepare(const Shape& input,
+                           const Shape& weights,
+                           const Shape& bias,
+                           Shape* output);
+
+bool concatenationPrepare(const std::vector<Shape>& inputShapes,
+                          int32_t axis,
+                          Shape* output);
+
+bool genericNormalizationPrepare(const Shape& input, Shape* output);
+
+bool reshapePrepare(const Shape& input,
+                    const int32_t* targetDims,
+                    const int32_t targetDimsSize,
+                    Shape* output);
+
+bool resizeBilinearPrepare(const Shape& input,
+                           int32_t height,
+                           int32_t width,
+                           Shape* output);
+
+bool depthToSpacePrepare(const Shape& input,
+                         int32_t blockSize,
+                         Shape* output);
+
+bool spaceToDepthPrepare(const Shape& input,
+                         int32_t blockSize,
+                         Shape* output);
+
+bool embeddingLookupPrepare(const Shape &valueShape,
+                            const Shape &lookupShape,
+                            Shape *outputShape);
+
+bool hashtableLookupPrepare(const Shape &lookupShape,
+                            const Shape &keyShape,
+                            const Shape &valueShape,
+                            Shape *outputShape,
+                            Shape *hitShape);
+
+#define ANDROID_NN_MACRO_DISPATCH(macro)                                    \
+    switch (activation) {                                                   \
+        case (int32_t) FusedActivationFunc::NONE:                           \
+            macro(kNone);                                                   \
+            break;                                                          \
+        case (int32_t) FusedActivationFunc::RELU:                           \
+            macro(kRelu);                                                   \
+            break;                                                          \
+        case (int32_t) FusedActivationFunc::RELU1:                          \
+            macro(kRelu1);                                                  \
+            break;                                                          \
+        case (int32_t) FusedActivationFunc::RELU6:                          \
+            macro(kRelu6);                                                  \
+            break;                                                          \
+        default:                                                            \
+            LOG(ERROR) << "Unsupported fused activation function type";     \
+            return false;                                                   \
+    }
+
+} // namespace nn
+} // namespace android
+
+#endif // ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H