From: Hyung-Kyu Choi Date: Thu, 29 Mar 2018 05:12:15 +0000 (+0900) Subject: Introduce OperationsUtils X-Git-Tag: 0.1~522 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a889e69c2dff771668e69efd302871ef0f1388b6;p=platform%2Fcore%2Fml%2Fnnfw.git Introduce OperationsUtils - Introduce OperationsUtils - Make use of types introduced from OperationUtils in CpuExecutor Signed-off-by: Hyung-Kyu Choi --- diff --git a/src/runtime/ref/nn/common/CMakeLists.txt b/src/runtime/ref/nn/common/CMakeLists.txt index 3c6f81b..4223f7f 100644 --- a/src/runtime/ref/nn/common/CMakeLists.txt +++ b/src/runtime/ref/nn/common/CMakeLists.txt @@ -10,6 +10,7 @@ SET (INC_DIRS SET (CUR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/CpuExecutor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/OperationsUtils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Utils.cpp ) SET (SRCS diff --git a/src/runtime/ref/nn/common/CpuExecutor.cpp b/src/runtime/ref/nn/common/CpuExecutor.cpp index bf74650..18e4d57 100644 --- a/src/runtime/ref/nn/common/CpuExecutor.cpp +++ b/src/runtime/ref/nn/common/CpuExecutor.cpp @@ -19,9 +19,7 @@ #include "CpuExecutor.h" #include "NeuralNetworks.h" -#if 0 // REF-ANN #include "Operations.h" -#endif #include @@ -98,6 +96,7 @@ bool setRunTimePoolInfosFromHidlMemories(std::vector* poolInfos } return true; } +#endif // Updates the RunTimeOperandInfo with the newly calculated shape. // Allocate the buffer if we need to. @@ -129,7 +128,6 @@ static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& sh } return true; } -#endif // Ignore the .pools entry in model and request. This will have been taken care of // by the caller. @@ -310,9 +308,9 @@ int CpuExecutor::executeOperation(const Operation& operation) { int32_t activation = getScalarData(mOperands[ins[2]]); RunTimeOperandInfo& out = mOperands[outs[0]]; -#if 0 // REF-ANN Shape outShape = out.shape(); +#if 0 // REF-ANN if (in1.type == OperandType::TENSOR_FLOAT32) { success = addMulPrepare(in1.shape(), in2.shape(), &outShape) && setInfoAndAllocateIfNeeded(&out, outShape) && diff --git a/src/runtime/ref/nn/common/OperationsUtils.cpp b/src/runtime/ref/nn/common/OperationsUtils.cpp new file mode 100644 index 0000000..9c3df01 --- /dev/null +++ b/src/runtime/ref/nn/common/OperationsUtils.cpp @@ -0,0 +1,551 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "OperationsUtils" + +#include "OperationsUtils.h" +#include "Operations.h" +#include "Utils.h" + +#include + +// TODO-NNRT: There was no included in Android NN code. Remove this later if unnecessary +#include + +namespace android { +namespace nn { + +bool SameShape(const Shape& in1, const Shape& in2) { + if (in1.type != in2.type || in1.dimensions.size() != in2.dimensions.size()) { + return false; + } + for (size_t i = 0; i < in1.dimensions.size(); i++) { + if (in1.dimensions[i] != in2.dimensions[i]) { + return false; + } + } + return true; +} + +bool SetShape(const Shape& in, Shape* out) { + if (in.type != out->type || in.dimensions.size() != out->dimensions.size()) { + return false; + } + out->dimensions = in.dimensions; + return true; +} + +uint32_t getNumberOfElements(const Shape& shape) { + uint32_t count = 1; + for (size_t i = 0; i < shape.dimensions.size(); i++) { + count *= shape.dimensions[i]; + } + return count; +} + +uint32_t getNumberOfDimensions(const Shape& shape) { + return shape.dimensions.size(); +} + +uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) { + if (dimensionIdx >= shape.dimensions.size()) { + // TODO, log the error + return 0; + } + return shape.dimensions[dimensionIdx]; +} + +bool QuantizeMultiplierSmallerThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int32_t* right_shift) { + NN_OPS_CHECK(double_multiplier >= 0.); + NN_OPS_CHECK(double_multiplier < 1.); + if (double_multiplier == 0.) { + *quantized_multiplier = 0; + *right_shift = 0; + return true; + } + NN_OPS_CHECK(double_multiplier > 0.); + const double q = std::frexp(double_multiplier, right_shift); + *right_shift *= -1; + int64_t q_fixed = static_cast(std::round(q * (1ll << 31))); + NN_OPS_CHECK(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) { + q_fixed /= 2; + --*right_shift; + } + NN_OPS_CHECK(*right_shift >= 0); + NN_OPS_CHECK(q_fixed <= std::numeric_limits::max()); + *quantized_multiplier = static_cast(q_fixed); + return true; +} + +bool QuantizeMultiplierGreaterThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift) { + NN_OPS_CHECK(double_multiplier > 1.); + const double q = std::frexp(double_multiplier, left_shift); + int64_t q_fixed = static_cast(std::round(q * (1ll << 31))); + NN_OPS_CHECK(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) { + q_fixed /= 2; + ++*left_shift; + } + NN_OPS_CHECK(*left_shift >= 0); + NN_OPS_CHECK(q_fixed <= std::numeric_limits::max()); + *quantized_multiplier = static_cast(q_fixed); + return true; +} + +bool GetQuantizedConvolutionMultipler(const Shape& inputShape, + const Shape& filterShape, + const Shape& biasShape, + const Shape& outputShape, + float* multiplier) { + const float input_product_scale = inputShape.scale * filterShape.scale; + const float bias_scale = biasShape.scale; + const float output_scale = outputShape.scale; + + // The following conditions must be guaranteed by the training pipeline. + NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <= + 1e-6 * std::min(input_product_scale, bias_scale)); + NN_OPS_CHECK(input_product_scale >= 0); + NN_OPS_CHECK(input_product_scale < output_scale); + *multiplier = input_product_scale / output_scale; + return true; +} + +void CalculateActivationRangeUint8(int32_t activation, + const Shape& outputShape, + int32_t* act_min, + int32_t* act_max) { + const int32_t qmin = std::numeric_limits::min(); + const int32_t qmax = std::numeric_limits::max(); + + const auto scale = outputShape.scale; + const auto zero_point = outputShape.offset; + + auto quantize = [scale, zero_point](float f) { + return zero_point + static_cast(std::round(f / scale)); + }; + +// TODO-NNRT Enable below code when common/include/ActivationFunctor.h available +#if 0 // REF-ANN + if (activation == kActivationRelu) { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = qmax; + } else if (activation == kActivationRelu6) { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = std::min(qmax, quantize(6.0)); + } else if (activation == kActivationRelu1) { + *act_min = std::max(qmin, quantize(-1.0)); + *act_max = std::min(qmax, quantize(1.0)); + } else { + *act_min = qmin; + *act_max = qmax; + } +#endif +} + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) { + const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * + (1ll << (31 - input_integer_bits)) / + (1ll << input_left_shift); + // Tighten bound using floor. Suppose that we could use the exact value. + // After scaling the difference, the result would be at the maximum. Thus we + // must ensure that our value has lower magnitude. + return static_cast(std::floor(max_input_rescaled)); +} + +bool addMulPrepare(const Shape& in1, const Shape& in2, Shape* out) { + NN_OPS_CHECK(getNumberOfDimensions(in1) <= 4 && getNumberOfDimensions(in2) <= 4); + NN_OPS_CHECK(in1.type == in2.type); + if (SameShape(in1, in2)) { + return SetShape(in1, out); + } else { + // BroadcastAdd needed + uint32_t numberOfDims1 = getNumberOfDimensions(in1); + uint32_t numberOfDims2 = getNumberOfDimensions(in2); + uint32_t maxDims = std::max(numberOfDims1, numberOfDims2); + out->dimensions = std::vector(maxDims); + for (uint32_t i = 1; i <= maxDims; i++) { + uint32_t dim1 = 1; + if (i <= numberOfDims1) { + dim1 = getSizeOfDimension(in1, numberOfDims1 - i); + } + uint32_t dim2 = 1; + if (i <= numberOfDims2) { + dim2 = getSizeOfDimension(in2, numberOfDims2 - i); + } + if (dim1 != dim2 && dim1 != 1 && dim2 != 1) { + LOG(ERROR) << "Dimensions mismatch for BroadcastAdd"; + return false; + } + out->dimensions[maxDims - i] = std::max(dim1, dim2); + } + } + return true; +} + +bool floorPrepare(const Shape& input, Shape* output) { + return SetShape(input, output); +} + +bool dequantizePrepare(const Shape& input, Shape* output) { + if (input.type != OperandType::TENSOR_QUANT8_ASYMM || + output->type != OperandType::TENSOR_FLOAT32) { + LOG(ERROR) << "bad input / output operand type."; + return false; + } + if (input.dimensions.size() != output->dimensions.size()) { + LOG(ERROR) << "input and output tensors don't have the same rank."; + return false; + } + output->dimensions = input.dimensions; + return true; +} + +bool convPrepare(const Shape& input, + const Shape& filter, + const Shape& bias, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + Shape* output) { + NN_OPS_CHECK(input.type == filter.type); + if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { + NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32); + } else { + NN_OPS_CHECK(input.type == bias.type); + } + NN_OPS_CHECK(getNumberOfDimensions(input) == 4); + NN_OPS_CHECK(getNumberOfDimensions(filter) == 4); + NN_OPS_CHECK(getNumberOfDimensions(bias) == 1); + + NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0)); + NN_OPS_CHECK(getSizeOfDimension(filter, 3) == getSizeOfDimension(input, 3)); + + uint32_t channels_out = getSizeOfDimension(filter, 0); + uint32_t width = getSizeOfDimension(input, 2); + uint32_t height = getSizeOfDimension(input, 1); + uint32_t filterWidth = getSizeOfDimension(filter, 2); + uint32_t filterHeight = getSizeOfDimension(filter, 1); + uint32_t batches = getSizeOfDimension(input, 0); + + uint32_t outWidth = computeOutSize(width, filterWidth, stride_width, + padding_left, padding_right); + uint32_t outHeight = computeOutSize(height, filterHeight, stride_height, + padding_top, padding_bottom); + + output->type = input.type; + output->dimensions = {batches, outHeight, outWidth, channels_out}; + return true; +} + +bool depthwiseConvPrepare(const Shape& input, + const Shape& filter, + const Shape& bias, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + Shape* output) { + NN_OPS_CHECK(input.type == filter.type); + if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { + NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32); + } else { + NN_OPS_CHECK(input.type == bias.type); + } + NN_OPS_CHECK(getNumberOfDimensions(input) == 4); + NN_OPS_CHECK(getNumberOfDimensions(filter) == 4); + NN_OPS_CHECK(getNumberOfDimensions(bias) == 1); + + NN_OPS_CHECK(getSizeOfDimension(filter, 3) == getSizeOfDimension(bias, 0)); + + uint32_t channels_out = getSizeOfDimension(filter, 3); + uint32_t width = getSizeOfDimension(input, 2); + uint32_t height = getSizeOfDimension(input, 1); + uint32_t filterWidth = getSizeOfDimension(filter, 2); + uint32_t filterHeight = getSizeOfDimension(filter, 1); + uint32_t batches = getSizeOfDimension(input, 0); + + uint32_t outWidth = computeOutSize(width, filterWidth, stride_width, + padding_left, padding_right); + uint32_t outHeight = computeOutSize(height, filterHeight, stride_height, + padding_top, padding_bottom); + + output->type = input.type; + output->dimensions = {batches, outHeight, outWidth, channels_out}; + return true; +} + + +bool genericPoolingPrepare(const Shape& input, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + Shape* output) { + NN_OPS_CHECK(getNumberOfDimensions(input) == 4); + + uint32_t batches = getSizeOfDimension(input, 0); + uint32_t width = getSizeOfDimension(input, 2); + uint32_t height = getSizeOfDimension(input, 1); + uint32_t channels_out = getSizeOfDimension(input, 3); + + uint32_t outWidth = computeOutSize(width, filter_width, stride_width, + padding_left, padding_right); + uint32_t outHeight = computeOutSize(height, filter_height, stride_height, + padding_top, padding_bottom); + + output->type = input.type; + output->dimensions = {batches, outHeight, outWidth, channels_out}; + return true; +} + + +bool genericActivationPrepare(const Shape& input, + Shape* output) { + NN_OPS_CHECK(getNumberOfDimensions(input) <= 4); + return SetShape(input, output); +} + +bool fullyConnectedPrepare(const Shape& input, + const Shape& weights, + const Shape& bias, + Shape* output) { + // Check all the parameters of tensor match within themselves and match the + // input configuration. + NN_OPS_CHECK(input.type == weights.type); + if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { + NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32); + } else { + NN_OPS_CHECK(input.type == bias.type); + } + NN_OPS_CHECK(getNumberOfDimensions(input) >= 2); + uint32_t input_size = getNumberOfElements(input); + uint32_t num_units = getSizeOfDimension(weights, 0); + uint32_t batch_size = input_size / getSizeOfDimension(weights, 1); + + NN_OPS_CHECK(getSizeOfDimension(bias, 0) == num_units); + NN_OPS_CHECK(getSizeOfDimension(weights, 1) * batch_size == input_size); + NN_OPS_CHECK(getNumberOfDimensions(weights) == 2); + + output->type = input.type; + output->dimensions = {batch_size, num_units}; + + return true; +} + +bool concatenationPrepare(const std::vector& inputShapes, + int32_t axis, + Shape* output) { + + int num_inputs = inputShapes.size(); + OperandType input_type = inputShapes[0].type; + uint32_t num_dimensions = getNumberOfDimensions(inputShapes[0]); + + NN_OPS_CHECK(axis >= 0); + NN_OPS_CHECK(axis < (int32_t)num_dimensions); + + int sum_axis = getSizeOfDimension(inputShapes[0], axis); + for (int i = 1; i < num_inputs; ++i) { + NN_OPS_CHECK(getNumberOfDimensions(inputShapes[i]) == num_dimensions); + NN_OPS_CHECK(inputShapes[i].type == inputShapes[0].type); + if (input_type == OperandType::TENSOR_QUANT8_ASYMM) { + NN_OPS_CHECK(inputShapes[0].offset == inputShapes[i].offset); + NN_OPS_CHECK(inputShapes[0].scale == inputShapes[i].scale); + } + for (int d = 0; d < (int32_t)num_dimensions; ++d) { + if (d == axis) { + sum_axis += getSizeOfDimension(inputShapes[i], axis); + } else { + NN_OPS_CHECK(getSizeOfDimension(inputShapes[0], d) == + getSizeOfDimension(inputShapes[i], d)); + } + } + } + + output->type = input_type; + output->dimensions = inputShapes[0].dimensions; + output->dimensions[axis] = sum_axis; + + if (input_type == OperandType::TENSOR_QUANT8_ASYMM) { + NN_OPS_CHECK(inputShapes[0].offset == output->offset); + NN_OPS_CHECK(inputShapes[0].scale == output->scale); + } + + return true; +} + + +bool genericNormalizationPrepare(const Shape& input, Shape* output) { + NN_OPS_CHECK(getNumberOfDimensions(input) == 4); + return SetShape(input, output); +} + +bool reshapePrepare(const Shape& input, + const int32_t* targetDims, + const int32_t targetDimsSize, + Shape* output) { + // Reshape allows one of the targetDims components to have the + // special -1 value, meaning it will be calculated automatically based on the + // input. Here we calculate what that dimension should be so that the number + // of output elements in the same as the number of input elements. + int32_t numInputElements = (int32_t) getNumberOfElements(input); + + std::vector outDims(targetDimsSize); + int32_t numOutputElements = 1; + int32_t strechDim = -1; + for (int32_t i = 0; i < targetDimsSize; ++i) { + int32_t value = targetDims[i]; + if (value == -1) { + NN_OPS_CHECK(strechDim == -1); + strechDim = i; + } else { + numOutputElements *= value; + outDims[i] = (uint32_t)value; + } + } + if (strechDim != -1) { + int32_t strechValue = numInputElements / numOutputElements; + outDims[strechDim] = (uint32_t) strechValue; + numOutputElements *= strechValue; + } + + NN_OPS_CHECK(numInputElements == numOutputElements); + + output->type = input.type; + output->dimensions = outDims; + output->offset = input.offset; + output->scale = input.scale; + + return true; +} + +bool resizeBilinearPrepare(const Shape& input, + int32_t width, + int32_t height, + Shape* output) { + NN_OPS_CHECK(getNumberOfDimensions(input) == 4); + uint32_t batches = getSizeOfDimension(input, 0); + uint32_t channels = getSizeOfDimension(input, 3); + + output->type = input.type; + output->dimensions = {batches, (uint32_t)height, (uint32_t)width, channels}; + + return true; +} + +bool depthToSpacePrepare(const Shape& input, + int32_t blockSize, + Shape* output) { + NN_OPS_CHECK(getNumberOfDimensions(input) == 4); + NN_OPS_CHECK(blockSize > 0); + + uint32_t batches = getSizeOfDimension(input, 0); + uint32_t height = getSizeOfDimension(input, 1); + uint32_t width = getSizeOfDimension(input, 2); + uint32_t channels = getSizeOfDimension(input, 3); + + NN_OPS_CHECK(channels % (blockSize * blockSize) == 0); + output->type = input.type; + output->dimensions = {batches, + height * blockSize, + width * blockSize, + channels / (blockSize * blockSize)}; + output->offset = input.offset; + output->scale = input.scale; + + return true; +} + +bool spaceToDepthPrepare(const Shape& input, + int32_t blockSize, + Shape* output) { + NN_OPS_CHECK(getNumberOfDimensions(input) == 4); + NN_OPS_CHECK(blockSize > 0); + + uint32_t batches = getSizeOfDimension(input, 0); + uint32_t height = getSizeOfDimension(input, 1); + uint32_t width = getSizeOfDimension(input, 2); + uint32_t channels = getSizeOfDimension(input, 3); + + NN_OPS_CHECK(height % blockSize == 0); + NN_OPS_CHECK(width % blockSize == 0); + + output->type = input.type; + output->dimensions = {batches, + height / blockSize, + width / blockSize, + channels * (blockSize * blockSize)}; + output->offset = input.offset; + output->scale = input.scale; + + return true; +} + +bool embeddingLookupPrepare(const Shape &valueShape, + const Shape &lookupShape, + Shape *outputShape) { + NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2); + NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1); + + const uint32_t rows = getSizeOfDimension(valueShape, 0); + const uint32_t columns = getSizeOfDimension(valueShape, 1); + + const uint32_t lookups = getSizeOfDimension(lookupShape, 0); + + outputShape->type = valueShape.type; + outputShape->dimensions = { lookups, columns }; + for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) { + outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i)); + } + outputShape->offset = valueShape.offset; + outputShape->scale = valueShape.scale; + + return true; +} + +bool hashtableLookupPrepare(const Shape &lookupShape, + const Shape &keyShape, + const Shape &valueShape, + Shape *outputShape, + Shape *hitShape) { + NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1); + NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1); + NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1); + + const uint32_t lookups = getSizeOfDimension(lookupShape, 0); + const uint32_t keys = getSizeOfDimension(keyShape, 0); + const uint32_t rows = getSizeOfDimension(valueShape, 0); + outputShape->type = valueShape.type; + outputShape->dimensions = { lookups }; + for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) { + outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i)); + } + outputShape->offset = valueShape.offset; + outputShape->scale = valueShape.scale; + + hitShape->type = OperandType::TENSOR_QUANT8_ASYMM; + hitShape->dimensions = { lookups }; + hitShape->offset = 0; + hitShape->scale = 1.f; + + return true; +} + +} // namespace nn +} // namespace android diff --git a/src/runtime/ref/nn/common/include/CpuExecutor.h b/src/runtime/ref/nn/common/include/CpuExecutor.h index 8f961ea..e0a98b7 100644 --- a/src/runtime/ref/nn/common/include/CpuExecutor.h +++ b/src/runtime/ref/nn/common/include/CpuExecutor.h @@ -18,9 +18,7 @@ #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H #include "HalInterfaces.h" -#if 0 // REF-ANN #include "OperationsUtils.h" -#endif #include "Utils.h" #include @@ -57,11 +55,9 @@ struct RunTimeOperandInfo { // always 0. uint32_t numberOfUsesLeft; -#if 0 // REF-ANN Shape shape() const { return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint}; } -#endif }; // Used to keep a pointer to each of the memory pools. diff --git a/src/runtime/ref/nn/common/include/Operations.h b/src/runtime/ref/nn/common/include/Operations.h new file mode 100644 index 0000000..006772f --- /dev/null +++ b/src/runtime/ref/nn/common/include/Operations.h @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_H +#define ANDROID_ML_NN_COMMON_OPERATIONS_H + +#if 0 // REF-ANN +#include "operations/EmbeddingLookup.h" +#include "operations/HashtableLookup.h" +#include "operations/LSHProjection.h" +#include "operations/LSTM.h" +#include "operations/RNN.h" +#include "operations/SVDF.h" +#endif + +#include + +#include +#include + +namespace android { +namespace nn { + +struct Shape; + +bool addFloat32(const float* in1, const Shape& shape1, + const float* in2, const Shape& shape2, + int32_t activation, + float* out, const Shape& shapeOut); +bool addQuant8(const uint8_t* in1, const Shape& shape1, + const uint8_t* in2, const Shape& shape2, + int32_t activation, + uint8_t* out, const Shape& shapeOut); + +bool mulFloat32(const float* in1, const Shape& shape1, + const float* in2, const Shape& shape2, + int32_t activation, + float* out, const Shape& shapeOut); +bool mulQuant8(const uint8_t* in1, const Shape& shape1, + const uint8_t* in2, const Shape& shape2, + int32_t activation, + uint8_t* out, const Shape& shapeOut); + +bool floorFloat32(const float* inputData, + float* outputData, + const Shape& shape); + +bool dequantizeQuant8ToFloat32(const uint8_t* inputData, + float* outputData, + const Shape& shape); + +bool depthwiseConvFloat32(const float* inputData, const Shape& inputShape, + const float* filterData, const Shape& filterShape, + const float* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t depth_multiplier, int32_t activation, + float* outputData, const Shape& outputShape); +bool depthwiseConvQuant8(const uint8_t* inputData, const Shape& inputShape, + const uint8_t* filterData, const Shape& filterShape, + const int32_t* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t depth_multiplier, int32_t activation, + uint8_t* outputData, const Shape& outputShape); + +bool convFloat32(const float* inputData, const Shape& inputShape, + const float* filterData, const Shape& filterShape, + const float* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const Shape& outputShape); +bool convQuant8(const uint8_t* inputData, const Shape& inputShape, + const uint8_t* filterData, const Shape& filterShape, + const int32_t* biasData, const Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + uint8_t* outputData, const Shape& outputShape); + +bool averagePoolFloat32(const float* inputData, const Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, int32_t activation, + float* outputData, const Shape& outputShape); +bool averagePoolQuant8(const uint8_t* inputData, const Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, int32_t activation, + uint8_t* outputData, const Shape& outputShape); +bool l2PoolFloat32(const float* inputData, const Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, int32_t activation, + float* outputData, const Shape& outputShape); +bool maxPoolFloat32(const float* inputData, const Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, int32_t activation, + float* outputData, const Shape& outputShape); +bool maxPoolQuant8(const uint8_t* inputData, const Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, int32_t activation, + uint8_t* outputData, const Shape& outputShape); + +bool reluFloat32(const float* inputData, const Shape& inputShape, + float* outputData, const Shape& outputShape); +bool relu1Float32(const float* inputData, const Shape& inputShape, + float* outputData, const Shape& outputShape); +bool relu6Float32(const float* inputData, const Shape& inputShape, + float* outputData, const Shape& outputShape); +bool tanhFloat32(const float* inputData, const Shape& inputShape, + float* outputData, const Shape& outputShape); +bool logisticFloat32(const float* inputData, const Shape& inputShape, + float* outputData, const Shape& outputShape); +bool softmaxFloat32(const float* inputData, const Shape& inputShape, + const float beta, + float* outputData, const Shape& outputShape); +bool reluQuant8(const uint8_t* inputData, const Shape& inputShape, + uint8_t* outputData, const Shape& outputShape); +bool relu1Quant8(const uint8_t* inputData, const Shape& inputShape, + uint8_t* outputData, const Shape& outputShape); +bool relu6Quant8(const uint8_t* inputData, const Shape& inputShape, + uint8_t* outputData, const Shape& outputShape); +bool logisticQuant8(const uint8_t* inputData, const Shape& inputShape, + uint8_t* outputData, const Shape& outputShape); +bool softmaxQuant8(const uint8_t* inputData, const Shape& inputShape, + const float beta, + uint8_t* outputData, const Shape& outputShape); + +bool fullyConnectedFloat32(const float* inputData, const Shape& inputShape, + const float* weights, const Shape& weightsShape, + const float* biasData, const Shape& biasShape, + int32_t activation, + float* outputData, const Shape& outputShape); +bool fullyConnectedQuant8(const uint8_t* inputData, const Shape& inputShape, + const uint8_t* weights, const Shape& weightsShape, + const int32_t* biasData, const Shape& biasShape, + int32_t activation, + uint8_t* outputData, const Shape& outputShape); + +bool concatenationFloat32(const std::vector& inputDataPtrs, + const std::vector& inputShapes, int32_t axis, + float* outputData, const Shape& outputShape); +bool concatenationQuant8(const std::vector& inputDataPtrs, + const std::vector& inputShapes, int32_t axis, + uint8_t* outputData, const Shape& outputShape); + +bool l2normFloat32(const float* inputData, const Shape& inputShape, + float* outputData, const Shape& outputShape); +bool l2normQuant8(const uint8_t* inputData, const Shape& inputShape, + uint8_t* outputData, const Shape& outputShape); +bool localResponseNormFloat32(const float* inputData, const Shape& inputShape, + int32_t radius, float bias, float alpha, float beta, + float* outputData, const Shape& outputShape); + +bool reshapeGeneric(const void* inputData, const Shape& inputShape, + void* outputData, const Shape& outputShape); + +bool resizeBilinearFloat32(const float* inputData, + const Shape& inputShape, + float* outputData, + const Shape& outputShape); + +bool depthToSpaceGeneric(const uint8_t* inputData, const Shape& inputShape, + int32_t blockSize, + uint8_t* outputData, const Shape& outputShape); + +bool spaceToDepthGeneric(const uint8_t* inputData, const Shape& inputShape, + int32_t blockSize, + uint8_t* outputData, const Shape& outputShape); + +} // namespace nn +} // namespace android + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_H diff --git a/src/runtime/ref/nn/common/include/OperationsUtils.h b/src/runtime/ref/nn/common/include/OperationsUtils.h new file mode 100644 index 0000000..aaca0c0 --- /dev/null +++ b/src/runtime/ref/nn/common/include/OperationsUtils.h @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H +#define ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H + +#include "Utils.h" + +#include +#include + +// Macro to check if the input parameters for operation are valid or not. +#define NN_CHECK(v) \ + do { \ + if (!(v)) { \ + LOG(ERROR) << "NN_CHECK failed: " << #v << "'\n"; \ + return false; \ + } \ + } while(0); + +#define NN_CHECK_EQ(actual, expected) \ + NN_CHECK((actual) == (expected)) + +#define NN_OPS_CHECK NN_CHECK + +namespace android { +namespace nn { + +enum PaddingScheme { + kPaddingUnknown = 0, + kPaddingSame = 1, + kPaddingValid = 2, +}; + +// The type and dimensions of an operand. +struct Shape { + OperandType type; + std::vector dimensions; + float scale; + int32_t offset; +}; + +// Verifies that the two shapes are the same. +bool SameShape(const Shape& in1, const Shape& in2); + +// Sets out to the same shape as in. +bool SetShape(const Shape& in, Shape* out); + +// Return the total number of elements, i.e. all the dimensions multiplied +// together. For a scalar, returns one. +uint32_t getNumberOfElements(const Shape& shape); + +uint32_t getNumberOfDimensions(const Shape& shape); + +uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx); + +inline uint32_t computeOutSize(uint32_t imageSize, uint32_t filterSize, uint32_t stride, + uint32_t paddingHead, uint32_t paddingTail) { + return (imageSize - filterSize + stride + paddingHead + paddingTail) / stride; +} + +__wur +bool QuantizeMultiplierSmallerThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int32_t* right_shift); + +__wur +bool QuantizeMultiplierGreaterThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift); + +__wur +bool GetQuantizedConvolutionMultipler(const Shape& inputShape, + const Shape& filterShape, + const Shape& biasShape, + const Shape& outputShape, + float* multiplier); + +void CalculateActivationRangeUint8(int32_t activation, + const Shape& outputShape, + int32_t* act_min, + int32_t* act_max); + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift); + +inline void calculateExplicitPadding(int32_t in_size, int32_t stride, + int32_t filter_size, int32_t padding_implicit, + int32_t* padding_head, int32_t* padding_tail) { + *padding_head = 0; + *padding_tail = 0; + + if (padding_implicit == kPaddingSame) { + int32_t out_size = (in_size + stride - 1) / stride; + int32_t tmp = (out_size - 1) * stride + filter_size; + if (tmp > in_size) { + *padding_head = (tmp - in_size) / 2; + *padding_tail = (tmp - in_size) - *padding_head; + } + } +} + +inline PaddingScheme getPaddingScheme(int32_t inWidth, int32_t inHeight, + int32_t strideWidth, int32_t strideHeight, + int32_t filterWidth, int32_t filterHeight, + int32_t paddingLeft, int32_t paddingRight, + int32_t paddingTop, int32_t paddingBottom) { + if (paddingLeft == 0 && paddingRight == 0 && paddingTop == 0 && paddingBottom == 0) { + return kPaddingValid; + } + + int32_t expectedPaddingLeft, expectedPaddingRight; + int32_t expectedPaddingTop, expectedPaddingBottom; + + calculateExplicitPadding(inWidth, strideWidth, filterWidth, kPaddingSame, + &expectedPaddingLeft, &expectedPaddingRight); + calculateExplicitPadding(inHeight, strideHeight, filterHeight, kPaddingSame, + &expectedPaddingTop, &expectedPaddingBottom); + if (expectedPaddingLeft == paddingLeft && expectedPaddingRight == paddingRight && + expectedPaddingTop == paddingTop && expectedPaddingBottom == paddingBottom) { + return kPaddingSame; + } else { + return kPaddingUnknown; + } +} + +// Preparation functions for the corresponding ops +bool addMulPrepare(const Shape& in1, const Shape& in2, Shape* out1); + +bool floorPrepare(const Shape& input, Shape* output); + +bool dequantizePrepare(const Shape& input, Shape* output); + +bool depthwiseConvPrepare(const Shape& input, + const Shape& filter, + const Shape& bias, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + Shape* output); + +bool convPrepare(const Shape& input, + const Shape& filter, + const Shape& bias, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + Shape* output); + +bool genericPoolingPrepare(const Shape& input, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + Shape* output); + +bool genericActivationPrepare(const Shape& input, Shape* output); + +bool fullyConnectedPrepare(const Shape& input, + const Shape& weights, + const Shape& bias, + Shape* output); + +bool concatenationPrepare(const std::vector& inputShapes, + int32_t axis, + Shape* output); + +bool genericNormalizationPrepare(const Shape& input, Shape* output); + +bool reshapePrepare(const Shape& input, + const int32_t* targetDims, + const int32_t targetDimsSize, + Shape* output); + +bool resizeBilinearPrepare(const Shape& input, + int32_t height, + int32_t width, + Shape* output); + +bool depthToSpacePrepare(const Shape& input, + int32_t blockSize, + Shape* output); + +bool spaceToDepthPrepare(const Shape& input, + int32_t blockSize, + Shape* output); + +bool embeddingLookupPrepare(const Shape &valueShape, + const Shape &lookupShape, + Shape *outputShape); + +bool hashtableLookupPrepare(const Shape &lookupShape, + const Shape &keyShape, + const Shape &valueShape, + Shape *outputShape, + Shape *hitShape); + +#define ANDROID_NN_MACRO_DISPATCH(macro) \ + switch (activation) { \ + case (int32_t) FusedActivationFunc::NONE: \ + macro(kNone); \ + break; \ + case (int32_t) FusedActivationFunc::RELU: \ + macro(kRelu); \ + break; \ + case (int32_t) FusedActivationFunc::RELU1: \ + macro(kRelu1); \ + break; \ + case (int32_t) FusedActivationFunc::RELU6: \ + macro(kRelu6); \ + break; \ + default: \ + LOG(ERROR) << "Unsupported fused activation function type"; \ + return false; \ + } + +} // namespace nn +} // namespace android + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H