This commit implements SoftMax kernel for CPU in new runtime.
Add others
- QuantizeMultiplierGreaterThanOne, CalculateInputRadius
Signed-off-by: sjsujinkim <sjsujin.kim@samsung.com>
#include "internal/kernels/cpufallback/ConcatLayer.h"
#include "internal/kernels/cpufallback/FullyConnectedLayer.h"
#include "internal/kernels/cpufallback/ReshapeLayer.h"
+#include "internal/kernels/cpufallback/SoftMaxLayer.h"
#include "logging.h"
Stage StageGenerator::generate(const ::internal::tflite::op::Softmax::Node &node)
{
- throw std::runtime_error("NYI");
+ VERBOSE(Softmax) << "generate CPU Softmax" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index scale_index{node.param().scale_index};
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+
+ ::internal::tflite::operand::Shape ofm_shape{1};
+ ::internal::tflite::operand::Shape ifm_shape{1};
+
+ float scale;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ param.ofm_shape = _ctx.at(output_index).shape();
+ param.ifm_shape = _ctx.at(input_index).shape();
+
+ param.scale = _ctx.at(scale_index).asScalar<float>();
+
+ auto tensors = _tensor_builder;
+
+ return [tensors, param](IExecutionBuilder &builder) {
+ auto output_alloc = tensors->at(::internal::tflite::operand::Index{param.output_index}).get();
+ auto input_alloc = tensors->at(::internal::tflite::operand::Index{param.input_index}).get();
+
+ std::unique_ptr<::internal::kernels::cpu::SoftMaxLayer> fn{
+ new ::internal::kernels::cpu::SoftMaxLayer};
+
+ fn->configure(input_alloc->buffer(), param.ifm_shape, param.scale, output_alloc->buffer(),
+ param.ofm_shape);
+
+ builder.append(std::move(fn));
+ };
}
} // namespace stage
return true;
}
+bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift)
+{
+ assert(double_multiplier > 1.);
+ const double q = std::frexp(double_multiplier, left_shift);
+ int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*left_shift;
+ }
+ assert(*left_shift >= 0);
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+ return true;
+}
+
void CalculateActivationRangeFloat(int32_t activation, float *activation_min, float *activation_max)
{
if (activation == ANEURALNETWORKS_FUSED_RELU)
}
}
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
+{
+ const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+ (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
+ // Tighten bound using floor. Suppose that we could use the exact value.
+ // After scaling the difference, the result would be at the maximum. Thus we
+ // must ensure that our value has lower magnitude.
+ return static_cast<int32_t>(std::floor(max_input_rescaled));
+}
+
Shape convertShape(const ::internal::tflite::operand::Shape &o)
{
Shape shape;
__wur bool GetQuantizedConvolutionMultipler(const Shape &inputShape, const Shape &filterShape,
const Shape &biasShape, const Shape &outputShape,
float *multiplier);
+__wur bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift);
void CalculateActivationRangeFloat(int32_t activation, float *activation_min,
float *activation_max);
void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t *act_min,
int32_t *act_max);
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
+
Shape convertShape(const ::internal::tflite::operand::Shape &o);
uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
--- /dev/null
+#include "SoftMaxLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "internal/kernels/cpufallback/OperationUtils.h"
+
+namespace internal
+{
+namespace kernels
+{
+namespace cpu
+{
+
+bool SoftMaxLayer::softmaxFloat32()
+{
+ ::tflite::Dims<4> dim;
+ if (getNumberOfDimensions(_inputShape) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
+ uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
+ Shape shapeIn4D;
+ shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
+ dim = convertShapeToDims(shapeIn4D);
+ }
+ else if (getNumberOfDimensions(_inputShape) == 4)
+ {
+ dim = convertShapeToDims(_inputShape);
+ }
+ else
+ {
+ std::cout << "only 2D and 4D tensors supported" << std::endl;
+ return false;
+ }
+ ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta,
+ reinterpret_cast<float *>(_outputData), dim);
+ return true;
+}
+
+bool SoftMaxLayer::softmaxQuant8()
+{
+ ::tflite::Dims<4> dim;
+ if (getNumberOfDimensions(_inputShape) == 2)
+ {
+ uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
+ uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
+ Shape shapeIn4D;
+ shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
+ dim = convertShapeToDims(shapeIn4D);
+ }
+ else if (getNumberOfDimensions(_inputShape) == 4)
+ {
+ dim = convertShapeToDims(_inputShape);
+ }
+ else
+ {
+ std::cout << "only 2D and 4D tensors supported" << std::endl;
+ return false;
+ }
+ if (_outputShape.offset != 0 || _outputShape.scale != 1.f / 256)
+ {
+ std::cout << "incorrect scale / offset for output" << std::endl;
+ return false;
+ }
+ static const int32_t kScaledDiffIntegerBits = 5;
+ const double input_beta_real_multiplier = std::min(
+ 1.0 * _beta * _inputShape.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
+ int32_t input_multiplier = 0;
+ int32_t input_left_shift = 0;
+ if (!QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
+ &input_left_shift))
+ {
+ return false;
+ }
+ float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
+ ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min,
+ _outputData, dim);
+ return true;
+}
+
+void SoftMaxLayer::configure(uint8_t *inputData,
+ const ::internal::tflite::operand::Shape &inputShape, const float beta,
+ uint8_t *outputData,
+ const ::internal::tflite::operand::Shape &outputShape)
+{
+ _inputData = inputData;
+ _inputShape = convertShape(inputShape);
+ _inputType = inputShape.type();
+ _outputData = outputData;
+ _outputShape = convertShape(outputShape);
+ _beta = beta;
+}
+
+void SoftMaxLayer::run()
+{
+ if (_inputType == static_cast<uint32_t>(OperandType::TENSOR_FLOAT32))
+ {
+ softmaxFloat32();
+ }
+ else if (_inputType == static_cast<uint32_t>(OperandType::TENSOR_QUANT8_ASYMM))
+ {
+ softmaxQuant8();
+ }
+}
+
+} // namespace cpu
+} // namespace kernels
+} // namespace internal
--- /dev/null
+#ifndef __INTERNAL_KERNELS_CPU_SOFTMAXLAYER_H__
+#define __INTERNAL_KERNELS_CPU_SOFTMAXLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "internal/Model.h"
+#include "internal/kernels/cpufallback/OperationUtils.h"
+
+using namespace internal::kernels::cpu;
+
+namespace internal
+{
+namespace kernels
+{
+namespace cpu
+{
+
+class SoftMaxLayer : public ::arm_compute::IFunction
+{
+public:
+ SoftMaxLayer() {}
+
+public:
+ bool softmaxFloat32();
+
+ bool softmaxQuant8();
+
+ void configure(uint8_t *inputData, const ::internal::tflite::operand::Shape &inputShape,
+ const float beta, uint8_t *outputData,
+ const ::internal::tflite::operand::Shape &outputShape);
+
+ void run();
+
+private:
+ uint8_t *_inputData;
+ uint8_t *_outputData;
+
+ float _beta;
+
+ Shape _inputShape;
+ Shape _outputShape;
+
+ int32_t _inputType;
+};
+
+} // namespace cpu
+} // namespace kernels
+} // namespace internal
+
+#endif // __INTERNAL_KERNELS_CPU_SOFTMAXLAYER_H__