{
switch (dataType)
{
- case DataType::Float16: return "Float16";
- case DataType::Float32: return "Float32";
- case DataType::QuantisedAsymm8: return "QAsymm8";
- case DataType::QuantisedSymm16: return "QSymm16";
- case DataType::Signed32: return "Signed32";
- case DataType::Boolean: return "Boolean";
+ case DataType::Float16: return "Float16";
+ case DataType::Float32: return "Float32";
+ case DataType::QuantisedAsymm8: return "QAsymm8";
+ case DataType::QuantizedSymm8PerAxis: return "QSymm8PerAxis";
+ case DataType::QuantisedSymm16: return "QSymm16";
+ case DataType::Signed32: return "Signed32";
+ case DataType::Boolean: return "Boolean";
default:
return "Unknown";
return uAxis;
}
+unsigned int GetNumElementsAfter(const armnn::TensorShape& shape, unsigned int axis)
+{
+ unsigned int numDim = shape.GetNumDimensions();
+ BOOST_ASSERT(0 >= axis);
+ BOOST_ASSERT(axis < numDim - 1);
+ unsigned int count = 1;
+ for (unsigned int i = axis; i < numDim; i++)
+ {
+ count *= shape[i];
+ }
+ return count;
+}
+
+std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::TensorInfo& info)
+{
+ const std::vector<float>& scales = info.GetQuantizationScales();
+ armnn::Optional<unsigned int> quantizationDim = info.GetQuantizationDim();
+ if (scales.size() < 1 || !quantizationDim.has_value())
+ {
+ throw armnn::InvalidArgumentException(
+ std::string("Per-axis quantization params not set for tensor of type ") +
+ armnn::GetDataTypeName(info.GetDataType()), CHECK_LOCATION());
+ }
+ unsigned int axisFactor = GetNumElementsAfter(info.GetShape(), quantizationDim.value());
+
+ return { axisFactor, scales };
+}
+
+
} // namespace armnnUtils
unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis);
-inline unsigned int GetNumElementsAfter(const armnn::TensorShape& shape,
- unsigned int axis)
-{
- unsigned int numDim = shape.GetNumDimensions();
- BOOST_ASSERT(0 >= axis);
- BOOST_ASSERT(axis < numDim - 1);
- unsigned int count = 1;
- for (unsigned int i = axis; i < numDim; i++)
- {
- count *= shape[i];
- }
- return count;
-}
-
-inline std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::TensorInfo& info)
-{
- const std::vector<float>& scales = info.GetQuantizationScales();
- armnn::Optional<unsigned int> quantizationDim = info.GetQuantizationDim();
- if (scales.size() < 1 || !quantizationDim.has_value())
- {
- throw armnn::InvalidArgumentException(
- "We currently support only per-axis symmetric quantization for QuantizedSymm8.");
- }
- unsigned int axisFactor = GetNumElementsAfter(info.GetShape(), quantizationDim.value());
+unsigned int GetNumElementsAfter(const armnn::TensorShape& shape, unsigned int axis);
- return {axisFactor, scales};
-}
+std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::TensorInfo& info);
} // namespace armnnUtils
std::vector<float> m_Scale;
};
-} //namespace armnn
\ No newline at end of file
+class ScaledInt32PerAxisDecoder : public PerAxisIterator<const int32_t, Decoder<float>>
+{
+public:
+ ScaledInt32PerAxisDecoder(const int32_t* data, const std::vector<float>& scales, unsigned int axisFactor)
+ : PerAxisIterator(data, axisFactor), m_Scales(scales) {}
+
+ float Get() const override
+ {
+ return armnn::Dequantize(*m_Iterator, m_Scales[m_AxisIndex], 0);
+ }
+
+ // Get scale of the current value
+ float GetScale() const
+ {
+ return m_Scales[m_AxisIndex];
+ }
+
+private:
+ std::vector<float> m_Scales;
+};
+
+} // namespace armnn
namespace armnn
{
+namespace
+{
+
+inline std::unique_ptr<Decoder<float>> MakeSigned32PerAxisDecoder(const TensorInfo& info, const void* data)
+{
+ auto params = armnnUtils::GetPerAxisParams(info);
+ return std::make_unique<ScaledInt32PerAxisDecoder>(
+ static_cast<const int32_t*>(data),
+ params.second,
+ params.first);
+}
+
+inline std::unique_ptr<Decoder<float>> MakeSigned32Decoder(const TensorInfo& info, const void* data)
+{
+ if(info.HasMultipleQuantizationScales())
+ {
+ // NOTE: If we have multiple quantization scales, we create a ScaledInt32PerAxisDecoder.
+ // This will be used to decode per-axis quantized convolution biases.
+ return MakeSigned32PerAxisDecoder(info, data);
+ }
+ else
+ {
+ if (info.GetQuantizationDim().has_value())
+ {
+ // NOTE: Even though we only have a single quantization scale, if the quantization
+ // dimension is set, the tensor has per-axis quantization and we need to create a
+ // ScaledInt32PerAxisDecoder
+ return MakeSigned32PerAxisDecoder(info, data);
+ }
+
+ const float scale = info.GetQuantizationScale();
+ if (scale == 0.f)
+ {
+ // NOTE:: If no quantization scale is set, we create an Int32Decoder, which simply
+ // casts the int value to float. This will be used for any INT32 data other than
+ // convolution biases.
+ return std::make_unique<Int32Decoder>(static_cast<const int32_t*>(data));
+ }
+
+ // NOTE: If we only have a single (non-zero) quantization scale and no quantization
+ // dimension is specified, we need to create a ScaledInt32Decoder. This will be used
+ // to decode per-tensor quantized convolution biases.
+ return std::make_unique<ScaledInt32Decoder>(static_cast<const int32_t*>(data), scale);
+ }
+}
+
+} // anonymous namespace
+
template<typename T>
inline std::unique_ptr<Decoder<T>> MakeDecoder(const TensorInfo& info, const void* data = nullptr);
}
case DataType::Signed32:
{
- const float scale = info.GetQuantizationScale();
- if (scale == 0.f)
- {
- return std::make_unique<Int32Decoder>(static_cast<const int32_t*>(data));
- }
- // NOTE: ScaledInt32Decoder is used for quantized convolution biases
- return std::make_unique<ScaledInt32Decoder>(static_cast<const int32_t*>(data), scale);
+ return MakeSigned32Decoder(info, data);
}
default:
{