From cfeadf0986ad60b0ae1eb18b3802539803c63b94 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Apr 2018 17:08:50 -0700 Subject: [PATCH] Make concat handler support mixed range input PiperOrigin-RevId: 191822664 --- tensorflow/contrib/lite/kernels/concatenation.cc | 22 +++---- .../contrib/lite/kernels/concatenation_test.cc | 68 ++++++++++++++++++++++ .../kernels/internal/optimized/optimized_ops.h | 56 ++++++++++++++++++ .../kernels/internal/reference/reference_ops.h | 55 +++++++++++++++++ tensorflow/contrib/lite/kernels/internal/tensor.h | 23 ++++++++ 5 files changed, 213 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/concatenation.cc b/tensorflow/contrib/lite/kernels/concatenation.cc index a619ada..45ea8d0 100644 --- a/tensorflow/contrib/lite/kernels/concatenation.cc +++ b/tensorflow/contrib/lite/kernels/concatenation.cc @@ -67,10 +67,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* t = &context->tensors[node->inputs->data[i]]; TF_LITE_ENSURE_EQ(context, t->dims->size, t0->dims->size); TF_LITE_ENSURE_EQ(context, t->type, input_type); - if (input_type == kTfLiteUInt8) { - TF_LITE_ENSURE_EQ(context, t->params.zero_point, t0->params.zero_point); - TF_LITE_ENSURE_EQ(context, t->params.scale, t0->params.scale); - } for (int d = 0; d < t0->dims->size; ++d) { if (d == axis) { sum_axis += t->dims->data[axis]; @@ -87,11 +83,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; TF_LITE_ENSURE_EQ(context, output->type, input_type); - if (input_type == kTfLiteUInt8) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, - t0->params.zero_point); - TF_LITE_ENSURE_EQ(context, output->params.scale, t0->params.scale); - } return context->ResizeTensor(context, output, output_size); } @@ -115,6 +106,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { all_inputs.dims(), node->inputs->size, GetTensorData(output), \ GetTensorDims(output)) +#define TF_LITE_CONCATENATION_QUANTIZED(type) \ + VectorOfQuantizedTensors all_inputs(*context, *node->inputs); \ + type::Concatenation( \ + RemapDim(NumDimensions(output), axis), all_inputs.data(), \ + all_inputs.dims(), all_inputs.zero_point(), all_inputs.scale(), \ + node->inputs->size, GetTensorData(output), GetTensorDims(output), \ + output->params.zero_point, output->params.scale) + switch (output->type) { // Already know in/outtypes are same. case kTfLiteFloat32: if (kernel_type == kReference) { @@ -125,9 +124,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { break; case kTfLiteUInt8: if (kernel_type == kReference) { - TF_LITE_CONCATENATION(reference_ops, uint8_t); + TF_LITE_CONCATENATION_QUANTIZED(reference_ops); } else { - TF_LITE_CONCATENATION(optimized_ops, uint8_t); + TF_LITE_CONCATENATION_QUANTIZED(optimized_ops); } break; default: @@ -136,6 +135,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } +#undef TF_LITE_CONCATENATION_QUANTIZED #undef TF_LITE_CONCATENATION return kTfLiteOk; diff --git a/tensorflow/contrib/lite/kernels/concatenation_test.cc b/tensorflow/contrib/lite/kernels/concatenation_test.cc index ba1ffc5..467ff6f 100644 --- a/tensorflow/contrib/lite/kernels/concatenation_test.cc +++ b/tensorflow/contrib/lite/kernels/concatenation_test.cc @@ -28,6 +28,7 @@ class BaseConcatenationOpModel : public SingleOpModel { public: // TODO(ahentz): Also test different activation types, axis, input // dimensions. + BaseConcatenationOpModel() {} BaseConcatenationOpModel(const TensorData& input_template, int axis, int num_inputs) { std::vector> all_input_shapes; @@ -60,6 +61,23 @@ class ConcatenationOpModel : public BaseConcatenationOpModel { class QuantizedConcatenationOpModel : public BaseConcatenationOpModel { public: using BaseConcatenationOpModel::BaseConcatenationOpModel; + QuantizedConcatenationOpModel(const std::vector& input_template, + int axis, int num_inputs, + const TensorData& output_template) { + std::vector> all_input_shapes; + CHECK_EQ(input_template.size(), num_inputs); + for (int i = 0; i < num_inputs; ++i) { + all_input_shapes.push_back(input_template[i].shape); + AddInput(input_template[i]); + } + output_ = AddOutput({output_template.type, /*shape=*/{}, + output_template.min, output_template.max}); + SetBuiltinOp( + BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions, + CreateConcatenationOptions(builder_, axis, ActivationFunctionType_NONE) + .Union()); + BuildInterpreter(all_input_shapes); + } void SetInput(int index, std::initializer_list data) { QuantizeAndPopulate(index, data); } @@ -168,6 +186,56 @@ TEST(ConcatenationOpTest, FourInputsQuantized) { })); } +TEST(ConcatenationOpTest, FourInputsQuantizedMixedRange) { + QuantizedConcatenationOpModel m0({{TensorType_UINT8, {2, 1, 2}, -10.7, 10.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 12.8}, + {TensorType_UINT8, {2, 1, 2}, -11, 11.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 7.4}}, + /*axis=*/2, /*num_inputs=*/4, + {TensorType_UINT8, {2, 1, 2}, -12.7, 12.8}); + + m0.SetInput(0, {1.0f, 3.0f, 4.0f, 7.0f}); + m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f}); + m0.SetInput(2, {1.2f, 3.2f, 4.2f, 7.2f}); + m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f}); + m0.Invoke(); + EXPECT_THAT(m0.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({ + 1.0f, 3.0f, 1.1f, 3.1f, 1.2f, 3.2f, 1.3f, 3.3f, // + 4.0f, 7.0f, 4.1f, 7.1f, 4.2f, 7.2f, 4.3f, 7.3f, // + }))); + EXPECT_THAT(m0.GetOutput(), ElementsAreArray({ + 137, 157, 138, 158, 139, 159, 140, 160, // + 167, 197, 168, 198, 169, 199, 170, 200, // + })); +} + +TEST(ConcatenationOpTest, FourInputsQuantizedMixedRangeClampingLogic) { + QuantizedConcatenationOpModel m0({{TensorType_UINT8, {2, 1, 2}, -10.7, 10.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 12.8}, + {TensorType_UINT8, {2, 1, 2}, -11, 11.8}, + {TensorType_UINT8, {2, 1, 2}, 0, 7.4}}, + /*axis=*/2, /*num_inputs=*/4, + {TensorType_UINT8, {2, 1, 2}, -1., 1.}); + + m0.SetInput(0, {1.0f, -3.0f, -4.0f, -7.0f}); + m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f}); + m0.SetInput(2, {1.2f, -3.2f, -4.2f, 7.2f}); + m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f}); + m0.Invoke(); + EXPECT_THAT(m0.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, // + -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, // + }, + 4e-3))); + EXPECT_THAT(m0.GetOutput(), ElementsAreArray({ + 255, 0, 255, 255, 255, 0, 255, 255, // + 0, 0, 255, 255, 0, 255, 255, 255, // + })); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 3642da3..9a27461 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2732,6 +2732,62 @@ void Concatenation(int concat_dim, const Scalar* const* input_data, } } +// TODO(prabhumk): This is the same as the reference implementation. +// TODO(prabhumk): The quantized implementation of concatentation isn't fully +// quantized as it takes scale as a floating point value. This should be fixed +// when optimizng this routine further. +inline void Concatenation(int concat_dim, const uint8* const* input_data, + const Dims<4>* const* input_dims, + const int32* input_zeropoint, + const float* input_scale, int inputs_count, + uint8* output_data, const Dims<4>& output_dims, + const int32 output_zeropoint, + const float output_scale) { + // The arguments input_zeropoint and input_scale are expected to be an array + // that have the quantization paramaters for all the inputs to the concat + // operator. + gemmlowp::ScopedProfilingLabel label("Concatenation"); + TFLITE_DCHECK_GT(inputs_count, 1); + int concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + for (int j = 0; j < 4; j++) { + if (j != concat_dim) { + MatchingArraySize(*input_dims[i], j, output_dims, j); + } + } + concat_size += ArraySize(*input_dims[i], concat_dim); + } + TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim)); + int outer_size = 1; + for (int i = concat_dim + 1; i < 4; i++) { + outer_size *= output_dims.sizes[i]; + } + const float inverse_output_scale = 1.f / output_scale; + uint8* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = + input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim]; + const uint8* input_ptr = input_data[i] + k * copy_size; + if (input_zeropoint[i] == output_zeropoint && + input_scale[i] == output_scale) { + memcpy(output_ptr, input_ptr, copy_size); + } else { + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + for (int j = 0; j < copy_size; ++j) { + const int32_t value = + static_cast(round(input_ptr[j] * scale + bias)) + + output_zeropoint; + output_ptr[j] = + static_cast(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + template void DepthConcatenation(const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 3575974..31e190e 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1566,6 +1566,61 @@ void Concatenation(int concat_dim, const Scalar* const* input_data, } } +// TODO(prabhumk): This is the same as the optimized implementation. +// TODO(prabhumk): The quantized implementation of concatentation isn't fully +// quantized as it takes scale as a floating point value. This should be fixed +// when optimizng this routine further. +inline void Concatenation(int concat_dim, const uint8* const* input_data, + const Dims<4>* const* input_dims, + const int32* input_zeropoint, + const float* input_scale, int inputs_count, + uint8* output_data, const Dims<4>& output_dims, + const int32 output_zeropoint, + const float output_scale) { + // The arguments input_zeropoint and input_scale are expected to be an array + // that have the quantization paramaters for all the inputs to the concat + // operator. + TFLITE_DCHECK_GT(inputs_count, 1); + int64_t concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + for (int j = 0; j < 4; j++) { + if (j != concat_dim) { + MatchingArraySize(*input_dims[i], j, output_dims, j); + } + } + concat_size += ArraySize(*input_dims[i], concat_dim); + } + TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim)); + int64_t outer_size = 1; + for (int i = concat_dim + 1; i < 4; i++) { + outer_size *= output_dims.sizes[i]; + } + const float inverse_output_scale = 1.f / output_scale; + uint8* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = + input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim]; + const uint8* input_ptr = input_data[i] + k * copy_size; + if (input_zeropoint[i] == output_zeropoint && + input_scale[i] == output_scale) { + memcpy(output_ptr, input_ptr, copy_size); + } else { + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + for (int j = 0; j < copy_size; ++j) { + const int32_t value = + static_cast(round(input_ptr[j] * scale + bias)) + + output_zeropoint; + output_ptr[j] = + static_cast(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + template void DepthConcatenation(const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h index 62e38e0..4bce2ff 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor.h +++ b/tensorflow/contrib/lite/kernels/internal/tensor.h @@ -126,6 +126,29 @@ class VectorOfTensors { std::vector*> all_dims_ptr_; }; +// A list of quantized tensors in a format that can be used by kernels like +// split and concatenation. +class VectorOfQuantizedTensors : public VectorOfTensors { + public: + // Build with the tensors in 'tensor_list'. + VectorOfQuantizedTensors(const TfLiteContext& context, + const TfLiteIntArray& tensor_list) + : VectorOfTensors(context, tensor_list) { + for (int i = 0; i < tensor_list.size; ++i) { + TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; + zero_point_.push_back(t->params.zero_point); + scale_.push_back(t->params.scale); + } + } + + const float* scale() const { return scale_.data(); } + const int32* zero_point() const { return zero_point_.data(); } + + private: + std::vector zero_point_; + std::vector scale_; +}; + } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_H_ -- 2.7.4