TfLiteTensor* t = &context->tensors[node->inputs->data[i]];
TF_LITE_ENSURE_EQ(context, t->dims->size, t0->dims->size);
TF_LITE_ENSURE_EQ(context, t->type, input_type);
- if (input_type == kTfLiteUInt8) {
- TF_LITE_ENSURE_EQ(context, t->params.zero_point, t0->params.zero_point);
- TF_LITE_ENSURE_EQ(context, t->params.scale, t0->params.scale);
- }
for (int d = 0; d < t0->dims->size; ++d) {
if (d == axis) {
sum_axis += t->dims->data[axis];
TfLiteTensor* output = &context->tensors[node->outputs->data[0]];
TF_LITE_ENSURE_EQ(context, output->type, input_type);
- if (input_type == kTfLiteUInt8) {
- TF_LITE_ENSURE_EQ(context, output->params.zero_point,
- t0->params.zero_point);
- TF_LITE_ENSURE_EQ(context, output->params.scale, t0->params.scale);
- }
return context->ResizeTensor(context, output, output_size);
}
all_inputs.dims(), node->inputs->size, GetTensorData<scalar>(output), \
GetTensorDims(output))
+#define TF_LITE_CONCATENATION_QUANTIZED(type) \
+ VectorOfQuantizedTensors all_inputs(*context, *node->inputs); \
+ type::Concatenation( \
+ RemapDim(NumDimensions(output), axis), all_inputs.data(), \
+ all_inputs.dims(), all_inputs.zero_point(), all_inputs.scale(), \
+ node->inputs->size, GetTensorData<uint8>(output), GetTensorDims(output), \
+ output->params.zero_point, output->params.scale)
+
switch (output->type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
if (kernel_type == kReference) {
break;
case kTfLiteUInt8:
if (kernel_type == kReference) {
- TF_LITE_CONCATENATION(reference_ops, uint8_t);
+ TF_LITE_CONCATENATION_QUANTIZED(reference_ops);
} else {
- TF_LITE_CONCATENATION(optimized_ops, uint8_t);
+ TF_LITE_CONCATENATION_QUANTIZED(optimized_ops);
}
break;
default:
return kTfLiteError;
}
+#undef TF_LITE_CONCATENATION_QUANTIZED
#undef TF_LITE_CONCATENATION
return kTfLiteOk;
public:
// TODO(ahentz): Also test different activation types, axis, input
// dimensions.
+ BaseConcatenationOpModel() {}
BaseConcatenationOpModel(const TensorData& input_template, int axis,
int num_inputs) {
std::vector<std::vector<int>> all_input_shapes;
class QuantizedConcatenationOpModel : public BaseConcatenationOpModel {
public:
using BaseConcatenationOpModel::BaseConcatenationOpModel;
+ QuantizedConcatenationOpModel(const std::vector<TensorData>& input_template,
+ int axis, int num_inputs,
+ const TensorData& output_template) {
+ std::vector<std::vector<int>> all_input_shapes;
+ CHECK_EQ(input_template.size(), num_inputs);
+ for (int i = 0; i < num_inputs; ++i) {
+ all_input_shapes.push_back(input_template[i].shape);
+ AddInput(input_template[i]);
+ }
+ output_ = AddOutput({output_template.type, /*shape=*/{},
+ output_template.min, output_template.max});
+ SetBuiltinOp(
+ BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions,
+ CreateConcatenationOptions(builder_, axis, ActivationFunctionType_NONE)
+ .Union());
+ BuildInterpreter(all_input_shapes);
+ }
void SetInput(int index, std::initializer_list<float> data) {
QuantizeAndPopulate<uint8_t>(index, data);
}
}));
}
+TEST(ConcatenationOpTest, FourInputsQuantizedMixedRange) {
+ QuantizedConcatenationOpModel m0({{TensorType_UINT8, {2, 1, 2}, -10.7, 10.8},
+ {TensorType_UINT8, {2, 1, 2}, 0, 12.8},
+ {TensorType_UINT8, {2, 1, 2}, -11, 11.8},
+ {TensorType_UINT8, {2, 1, 2}, 0, 7.4}},
+ /*axis=*/2, /*num_inputs=*/4,
+ {TensorType_UINT8, {2, 1, 2}, -12.7, 12.8});
+
+ m0.SetInput(0, {1.0f, 3.0f, 4.0f, 7.0f});
+ m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f});
+ m0.SetInput(2, {1.2f, 3.2f, 4.2f, 7.2f});
+ m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f});
+ m0.Invoke();
+ EXPECT_THAT(m0.GetDequantizedOutput(),
+ ElementsAreArray(ArrayFloatNear({
+ 1.0f, 3.0f, 1.1f, 3.1f, 1.2f, 3.2f, 1.3f, 3.3f, //
+ 4.0f, 7.0f, 4.1f, 7.1f, 4.2f, 7.2f, 4.3f, 7.3f, //
+ })));
+ EXPECT_THAT(m0.GetOutput(), ElementsAreArray({
+ 137, 157, 138, 158, 139, 159, 140, 160, //
+ 167, 197, 168, 198, 169, 199, 170, 200, //
+ }));
+}
+
+TEST(ConcatenationOpTest, FourInputsQuantizedMixedRangeClampingLogic) {
+ QuantizedConcatenationOpModel m0({{TensorType_UINT8, {2, 1, 2}, -10.7, 10.8},
+ {TensorType_UINT8, {2, 1, 2}, 0, 12.8},
+ {TensorType_UINT8, {2, 1, 2}, -11, 11.8},
+ {TensorType_UINT8, {2, 1, 2}, 0, 7.4}},
+ /*axis=*/2, /*num_inputs=*/4,
+ {TensorType_UINT8, {2, 1, 2}, -1., 1.});
+
+ m0.SetInput(0, {1.0f, -3.0f, -4.0f, -7.0f});
+ m0.SetInput(1, {1.1f, 3.1f, 4.1f, 7.1f});
+ m0.SetInput(2, {1.2f, -3.2f, -4.2f, 7.2f});
+ m0.SetInput(3, {1.3f, 3.3f, 4.3f, 7.3f});
+ m0.Invoke();
+ EXPECT_THAT(m0.GetDequantizedOutput(),
+ ElementsAreArray(ArrayFloatNear(
+ {
+ 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, //
+ -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, //
+ },
+ 4e-3)));
+ EXPECT_THAT(m0.GetOutput(), ElementsAreArray({
+ 255, 0, 255, 255, 255, 0, 255, 255, //
+ 0, 0, 255, 255, 0, 255, 255, 255, //
+ }));
+}
+
} // namespace
} // namespace tflite
}
}
+// TODO(prabhumk): This is the same as the reference implementation.
+// TODO(prabhumk): The quantized implementation of concatentation isn't fully
+// quantized as it takes scale as a floating point value. This should be fixed
+// when optimizng this routine further.
+inline void Concatenation(int concat_dim, const uint8* const* input_data,
+ const Dims<4>* const* input_dims,
+ const int32* input_zeropoint,
+ const float* input_scale, int inputs_count,
+ uint8* output_data, const Dims<4>& output_dims,
+ const int32 output_zeropoint,
+ const float output_scale) {
+ // The arguments input_zeropoint and input_scale are expected to be an array
+ // that have the quantization paramaters for all the inputs to the concat
+ // operator.
+ gemmlowp::ScopedProfilingLabel label("Concatenation");
+ TFLITE_DCHECK_GT(inputs_count, 1);
+ int concat_size = 0;
+ for (int i = 0; i < inputs_count; i++) {
+ for (int j = 0; j < 4; j++) {
+ if (j != concat_dim) {
+ MatchingArraySize(*input_dims[i], j, output_dims, j);
+ }
+ }
+ concat_size += ArraySize(*input_dims[i], concat_dim);
+ }
+ TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim));
+ int outer_size = 1;
+ for (int i = concat_dim + 1; i < 4; i++) {
+ outer_size *= output_dims.sizes[i];
+ }
+ const float inverse_output_scale = 1.f / output_scale;
+ uint8* output_ptr = output_data;
+ for (int k = 0; k < outer_size; k++) {
+ for (int i = 0; i < inputs_count; ++i) {
+ const int copy_size =
+ input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim];
+ const uint8* input_ptr = input_data[i] + k * copy_size;
+ if (input_zeropoint[i] == output_zeropoint &&
+ input_scale[i] == output_scale) {
+ memcpy(output_ptr, input_ptr, copy_size);
+ } else {
+ const float scale = input_scale[i] * inverse_output_scale;
+ const float bias = -input_zeropoint[i] * scale;
+ for (int j = 0; j < copy_size; ++j) {
+ const int32_t value =
+ static_cast<int32_t>(round(input_ptr[j] * scale + bias)) +
+ output_zeropoint;
+ output_ptr[j] =
+ static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
template <FusedActivationFunctionType Ac, typename Scalar>
void DepthConcatenation(const Scalar* const* input_data,
const Dims<4>* const* input_dims, int inputs_count,
}
}
+// TODO(prabhumk): This is the same as the optimized implementation.
+// TODO(prabhumk): The quantized implementation of concatentation isn't fully
+// quantized as it takes scale as a floating point value. This should be fixed
+// when optimizng this routine further.
+inline void Concatenation(int concat_dim, const uint8* const* input_data,
+ const Dims<4>* const* input_dims,
+ const int32* input_zeropoint,
+ const float* input_scale, int inputs_count,
+ uint8* output_data, const Dims<4>& output_dims,
+ const int32 output_zeropoint,
+ const float output_scale) {
+ // The arguments input_zeropoint and input_scale are expected to be an array
+ // that have the quantization paramaters for all the inputs to the concat
+ // operator.
+ TFLITE_DCHECK_GT(inputs_count, 1);
+ int64_t concat_size = 0;
+ for (int i = 0; i < inputs_count; i++) {
+ for (int j = 0; j < 4; j++) {
+ if (j != concat_dim) {
+ MatchingArraySize(*input_dims[i], j, output_dims, j);
+ }
+ }
+ concat_size += ArraySize(*input_dims[i], concat_dim);
+ }
+ TFLITE_DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim));
+ int64_t outer_size = 1;
+ for (int i = concat_dim + 1; i < 4; i++) {
+ outer_size *= output_dims.sizes[i];
+ }
+ const float inverse_output_scale = 1.f / output_scale;
+ uint8* output_ptr = output_data;
+ for (int k = 0; k < outer_size; k++) {
+ for (int i = 0; i < inputs_count; ++i) {
+ const int copy_size =
+ input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim];
+ const uint8* input_ptr = input_data[i] + k * copy_size;
+ if (input_zeropoint[i] == output_zeropoint &&
+ input_scale[i] == output_scale) {
+ memcpy(output_ptr, input_ptr, copy_size);
+ } else {
+ const float scale = input_scale[i] * inverse_output_scale;
+ const float bias = -input_zeropoint[i] * scale;
+ for (int j = 0; j < copy_size; ++j) {
+ const int32_t value =
+ static_cast<int32_t>(round(input_ptr[j] * scale + bias)) +
+ output_zeropoint;
+ output_ptr[j] =
+ static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
template <FusedActivationFunctionType Ac, typename Scalar>
void DepthConcatenation(const Scalar* const* input_data,
const Dims<4>* const* input_dims, int inputs_count,
std::vector<Dims<4>*> all_dims_ptr_;
};
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+class VectorOfQuantizedTensors : public VectorOfTensors<uint8> {
+ public:
+ // Build with the tensors in 'tensor_list'.
+ VectorOfQuantizedTensors(const TfLiteContext& context,
+ const TfLiteIntArray& tensor_list)
+ : VectorOfTensors<uint8>(context, tensor_list) {
+ for (int i = 0; i < tensor_list.size; ++i) {
+ TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
+ zero_point_.push_back(t->params.zero_point);
+ scale_.push_back(t->params.scale);
+ }
+ }
+
+ const float* scale() const { return scale_.data(); }
+ const int32* zero_point() const { return zero_point_.data(); }
+
+ private:
+ std::vector<int32> zero_point_;
+ std::vector<float> scale_;
+};
+
} // namespace tflite
#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_H_