From 38e05bd2836b1b65b440330a9c283038ba4192c3 Mon Sep 17 00:00:00 2001 From: Jan Eilers Date: Wed, 26 Jun 2019 13:10:09 +0100 Subject: [PATCH] IVGCVSW-3236 Extend Ref LSTM with layer normalization support * Add descriptor values * Update lstm queue descriptor validate function * Update lstm workload * Update isLstmSupported (Cl and Ref), LayerSupportBase, ILayerSupport * Update lstm layer * Add unit tests Signed-off-by: Jan Eilers Change-Id: I932175d550facfb342325051eaa7bd2084ebdc18 Signed-off-by: Jan Eilers --- include/armnn/Descriptors.hpp | 3 + include/armnn/ILayerSupport.hpp | 6 +- include/armnn/LstmParams.hpp | 8 + src/armnn/layers/LstmLayer.cpp | 81 ++++- src/armnn/layers/LstmLayer.hpp | 13 + src/backends/backendsCommon/LayerSupportBase.cpp | 6 +- src/backends/backendsCommon/LayerSupportBase.hpp | 6 +- src/backends/backendsCommon/WorkloadData.cpp | 271 ++++++++++++++- src/backends/backendsCommon/WorkloadData.hpp | 8 + src/backends/backendsCommon/WorkloadFactory.cpp | 33 +- src/backends/backendsCommon/test/LayerTests.cpp | 166 +++++++++ src/backends/backendsCommon/test/LayerTests.hpp | 11 + src/backends/backendsCommon/test/LstmTestImpl.hpp | 386 ++++++++++++++++++++- .../backendsCommon/test/WorkloadDataValidation.cpp | 149 +++++++- src/backends/cl/ClLayerSupport.cpp | 6 +- src/backends/cl/ClLayerSupport.hpp | 6 +- src/backends/reference/RefLayerSupport.cpp | 10 +- src/backends/reference/RefLayerSupport.hpp | 6 +- src/backends/reference/backend.mk | 1 + src/backends/reference/test/RefLayerTests.cpp | 14 + src/backends/reference/workloads/CMakeLists.txt | 1 + src/backends/reference/workloads/LstmUtils.cpp | 307 ++++++++++++++++ src/backends/reference/workloads/LstmUtils.hpp | 204 ++--------- .../reference/workloads/RefLstmWorkload.cpp | 100 +++++- .../reference/workloads/RefLstmWorkload.hpp | 6 + 25 files changed, 1597 insertions(+), 211 deletions(-) create mode 100644 src/backends/reference/workloads/LstmUtils.cpp diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp index 85e8b56..9175239 100644 --- a/include/armnn/Descriptors.hpp +++ b/include/armnn/Descriptors.hpp @@ -589,6 +589,7 @@ struct LstmDescriptor , m_CifgEnabled(true) , m_PeepholeEnabled(false) , m_ProjectionEnabled(false) + , m_LayerNormEnabled(false) {} /// @brief The activation function to use. @@ -604,6 +605,8 @@ struct LstmDescriptor bool m_PeepholeEnabled; /// Enable/disable the projection layer. bool m_ProjectionEnabled; + /// Enable/disable layer normalization + bool m_LayerNormEnabled; }; /// A MeanDescriptor for the MeanLayer. diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp index bf0ac90..635b9cc 100644 --- a/include/armnn/ILayerSupport.hpp +++ b/include/armnn/ILayerSupport.hpp @@ -170,7 +170,11 @@ public: const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional reasonIfUnsupported = EmptyOptional()) const = 0; + Optional reasonIfUnsupported = EmptyOptional(), + const TensorInfo* inputLayerNormWeights = nullptr, + const TensorInfo* forgetLayerNormWeights = nullptr, + const TensorInfo* cellLayerNormWeights = nullptr, + const TensorInfo* outputLayerNormWeights = nullptr) const = 0; virtual bool IsMaximumSupported(const TensorInfo& input0, const TensorInfo& input1, diff --git a/include/armnn/LstmParams.hpp b/include/armnn/LstmParams.hpp index c4f38f0..a7c57c7 100644 --- a/include/armnn/LstmParams.hpp +++ b/include/armnn/LstmParams.hpp @@ -29,6 +29,10 @@ struct LstmInputParams , m_OutputGateBias(nullptr) , m_ProjectionWeights(nullptr) , m_ProjectionBias(nullptr) + , m_InputLayerNormWeights(nullptr) + , m_ForgetLayerNormWeights(nullptr) + , m_CellLayerNormWeights(nullptr) + , m_OutputLayerNormWeights(nullptr) { } @@ -49,6 +53,10 @@ struct LstmInputParams const ConstTensor* m_OutputGateBias; const ConstTensor* m_ProjectionWeights; const ConstTensor* m_ProjectionBias; + const ConstTensor* m_InputLayerNormWeights; + const ConstTensor* m_ForgetLayerNormWeights; + const ConstTensor* m_CellLayerNormWeights; + const ConstTensor* m_OutputLayerNormWeights; }; } // namespace armnn diff --git a/src/armnn/layers/LstmLayer.cpp b/src/armnn/layers/LstmLayer.cpp index 2b99f28..4012839 100644 --- a/src/armnn/layers/LstmLayer.cpp +++ b/src/armnn/layers/LstmLayer.cpp @@ -55,6 +55,19 @@ std::unique_ptr LstmLayer::CreateWorkload(const Graph& graph, const I descriptor.m_CellToForgetWeights = m_PeepholeParameters.m_CellToForgetWeights.get(); descriptor.m_CellToOutputWeights = m_PeepholeParameters.m_CellToOutputWeights.get(); } + + // Layer normalisation parameters + if(m_Param.m_LayerNormEnabled) + { + if (!m_Param.m_CifgEnabled) + { + descriptor.m_InputLayerNormWeights = m_LayerNormParameters.m_InputLayerNormWeights.get(); + } + descriptor.m_ForgetLayerNormWeights = m_LayerNormParameters.m_ForgetLayerNormWeights.get(); + descriptor.m_CellLayerNormWeights = m_LayerNormParameters.m_CellLayerNormWeights.get(); + descriptor.m_OutputLayerNormWeights = m_LayerNormParameters.m_OutputLayerNormWeights.get(); + } + return factory.CreateLstm(descriptor, PrepInfoAndDesc(descriptor, graph)); } @@ -110,6 +123,18 @@ LstmLayer* LstmLayer::Clone(Graph& graph) const std::make_unique(*m_PeepholeParameters.m_CellToOutputWeights) : nullptr; } + if (m_Param.m_LayerNormEnabled) + { + layer->m_LayerNormParameters.m_InputLayerNormWeights = m_LayerNormParameters.m_InputLayerNormWeights ? + std::make_unique(*m_LayerNormParameters.m_InputLayerNormWeights) : nullptr; + layer->m_LayerNormParameters.m_ForgetLayerNormWeights = m_LayerNormParameters.m_ForgetLayerNormWeights ? + std::make_unique(*m_LayerNormParameters.m_ForgetLayerNormWeights) : nullptr; + layer->m_LayerNormParameters.m_CellLayerNormWeights = m_LayerNormParameters.m_CellLayerNormWeights ? + std::make_unique(*m_LayerNormParameters.m_CellLayerNormWeights) : nullptr; + layer->m_LayerNormParameters.m_OutputLayerNormWeights = m_LayerNormParameters.m_OutputLayerNormWeights ? + std::make_unique(*m_LayerNormParameters.m_OutputLayerNormWeights) : nullptr; + } + return std::move(layer); } @@ -220,6 +245,21 @@ void LstmLayer::ValidateTensorShapesFromInputs() "LstmLayer: TensorShape set on OutputSlot[3] does not match the inferred shape.", GetOutputSlot(3).GetTensorInfo().GetShape(), inferredShapes[3]); + + if (m_Param.m_LayerNormEnabled) + { + if(!m_Param.m_CifgEnabled) + { + BOOST_ASSERT_MSG(m_LayerNormParameters.m_InputLayerNormWeights != nullptr, + "LstmLayer: m_LayerNormParameters.m_inputLayerNormWeights should not be null."); + } + BOOST_ASSERT_MSG(m_LayerNormParameters.m_ForgetLayerNormWeights != nullptr, + "LstmLayer: m_LayerNormParameters.m_forgetLayerNormWeights should not be null."); + BOOST_ASSERT_MSG(m_LayerNormParameters.m_CellLayerNormWeights != nullptr, + "LstmLayer: m_LayerNormParameters.m_cellLayerNormWeights should not be null."); + BOOST_ASSERT_MSG(m_LayerNormParameters.m_OutputLayerNormWeights != nullptr, + "LstmLayer: m_LayerNormParameters.m_outputLayerNormWeights should not be null."); + } } Layer::ConstantTensors LstmLayer::GetConstantTensorsByRef() @@ -246,7 +286,13 @@ Layer::ConstantTensors LstmLayer::GetConstantTensorsByRef() // Peephole parameters m_PeepholeParameters.m_CellToForgetWeights, - m_PeepholeParameters.m_CellToOutputWeights}; + m_PeepholeParameters.m_CellToOutputWeights, + + // Layer normalisation parameters + m_LayerNormParameters.m_InputLayerNormWeights, + m_LayerNormParameters.m_ForgetLayerNormWeights, + m_LayerNormParameters.m_CellLayerNormWeights, + m_LayerNormParameters.m_OutputLayerNormWeights}; } void LstmLayer::Accept(ILayerVisitor& visitor) const @@ -392,6 +438,39 @@ void LstmLayer::Accept(ILayerVisitor& visitor) const projectionBiasTensor = projectionBiasTensorCopy; inputParams.m_ProjectionBias = &projectionBiasTensor; } + ConstTensor inputLayerNormTensor; + if (m_LayerNormParameters.m_InputLayerNormWeights != nullptr) + { + ConstTensor inputLayerNormTensorCopy(m_LayerNormParameters.m_InputLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_InputLayerNormWeights->Map(true)); + inputLayerNormTensor = inputLayerNormTensorCopy; + inputParams.m_InputLayerNormWeights = &inputLayerNormTensor; + } + ConstTensor forgetLayerNormTensor; + if (m_LayerNormParameters.m_ForgetLayerNormWeights != nullptr) + { + ConstTensor forgetLayerNormTensorCopy(m_LayerNormParameters.m_ForgetLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_ForgetLayerNormWeights->Map(true)); + forgetLayerNormTensor = forgetLayerNormTensorCopy; + inputParams.m_ForgetLayerNormWeights = &forgetLayerNormTensor; + } + ConstTensor cellLayerNormTensor; + if (m_LayerNormParameters.m_CellLayerNormWeights != nullptr) + { + ConstTensor cellLayerNormTensorCopy(m_LayerNormParameters.m_CellLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_CellLayerNormWeights->Map(true)); + cellLayerNormTensor = cellLayerNormTensorCopy; + inputParams.m_CellLayerNormWeights = &cellLayerNormTensor; + } + ConstTensor outputLayerNormTensor; + if (m_LayerNormParameters.m_OutputLayerNormWeights != nullptr) + { + ConstTensor outputLayerNormTensorCopy(m_LayerNormParameters.m_OutputLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_OutputLayerNormWeights->Map(true)); + outputLayerNormTensor = outputLayerNormTensorCopy; + inputParams.m_OutputLayerNormWeights = &outputLayerNormTensor; + } + visitor.VisitLstmLayer(this, GetParameters(), inputParams, GetName()); } diff --git a/src/armnn/layers/LstmLayer.hpp b/src/armnn/layers/LstmLayer.hpp index bfea5d8..584d8e2 100644 --- a/src/armnn/layers/LstmLayer.hpp +++ b/src/armnn/layers/LstmLayer.hpp @@ -11,6 +11,18 @@ namespace armnn class ScopedCpuTensorHandle; +struct LstmOptLayerNormParameters +{ + /// A unique pointer to represent 1D weights tensor with dimensions [num_units]. + std::unique_ptr m_InputLayerNormWeights; + /// A unique pointer to represent 1D weights tensor with dimensions [num_units]. + std::unique_ptr m_ForgetLayerNormWeights; + /// A unique pointer to represent 1D weights tensor with dimensions [num_units]. + std::unique_ptr m_CellLayerNormWeights; + /// A unique pointer to represent 1D weights tensor with dimensions [num_units]. + std::unique_ptr m_OutputLayerNormWeights; +}; + struct LstmOptCifgParameters { /// A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units]. @@ -70,6 +82,7 @@ public: LstmOptCifgParameters m_CifgParameters; LstmOptProjectionParameters m_ProjectionParameters; LstmOptPeepholeParameters m_PeepholeParameters; + LstmOptLayerNormParameters m_LayerNormParameters; /// Makes a workload for the LSTM type. /// @param [in] graph The graph where this layer can be found. diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp index 6c25f87..4488e25 100644 --- a/src/backends/backendsCommon/LayerSupportBase.cpp +++ b/src/backends/backendsCommon/LayerSupportBase.cpp @@ -243,7 +243,11 @@ bool LayerSupportBase::IsLstmSupported(const TensorInfo& input, const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional reasonIfUnsupported) const + Optional reasonIfUnsupported, + const TensorInfo* inputLayerNormWeights, + const TensorInfo* forgetLayerNormWeights, + const TensorInfo* cellLayerNormWeights, + const TensorInfo* outputLayerNormWeights) const { return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); } diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp index 7f63ccf..8abd975 100644 --- a/src/backends/backendsCommon/LayerSupportBase.hpp +++ b/src/backends/backendsCommon/LayerSupportBase.hpp @@ -157,7 +157,11 @@ public: const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional reasonIfUnsupported = EmptyOptional()) const override; + Optional reasonIfUnsupported = EmptyOptional(), + const TensorInfo* inputLayerNormWeights = nullptr, + const TensorInfo* forgetLayerNormWeights = nullptr, + const TensorInfo* cellLayerNormWeights = nullptr, + const TensorInfo* outputLayerNormWeights = nullptr) const override; bool IsMaximumSupported(const TensorInfo& input0, const TensorInfo& input1, diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index e7915dd..3766f5f 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -114,6 +114,30 @@ void ValidateTensorNumDimensions(const TensorInfo& tensor, } //--------------------------------------------------------------- +void ValidateTensorNumElements(const TensorInfo& tensor, + std::string const& descName, + unsigned int numElements, + std::string const& tensorName) +{ + if (tensor.GetNumElements() != numElements) + { + throw InvalidArgumentException(descName + ": Expected " + to_string(numElements) + " but got " + + to_string(tensor.GetNumDimensions()) + " elements for " + + tensorName + " tensor."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorNumDimNumElem(const TensorInfo& tensorInfo, + unsigned int numDimension, + unsigned int numElements, + std::string const& tensorName) +{ + ValidateTensorNumDimensions(tensorInfo, "ValidateTensorNumDimNumElem: NumDimensionCheck", numDimension, tensorName); + ValidateTensorNumElements(tensorInfo, "ValidateTensorNumDimNumElem: NumElementsCheck", numElements, tensorName); +} + +//--------------------------------------------------------------- void ValidateTensorDataType(const TensorInfo& tensor, DataType dataType, const std::string& descName, std::string const& tensorName) { @@ -1238,22 +1262,257 @@ void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const void LstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const { - ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "LstmQueueDescriptor", 2, "input"); - ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "LstmQueueDescriptor", 2, "output"); - std::vector supportedTypes = { DataType::Float16, DataType::Float32, DataType::QuantisedSymm16 }; + // ported from android/ml/nn/common/operations/LSTM.cpp CheckInputTensorDimensions() + // check for supported type of one input and match them with all the other input and output ValidateDataTypes(workloadInfo.m_InputTensorInfos[0], supportedTypes, "LstmQueueDescriptor"); + // type matches all other inputs + for (uint32_t i = 1; i < workloadInfo.m_InputTensorInfos.size(); ++i) + { + ValidateTensorDataTypesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[i], + "LstmQueueDescriptor", + "InputTensor[0]", + "InputTensor[" + std::to_string(i) + "]"); + } + // type matches all other outputs + for (uint32_t i = 0; i < workloadInfo.m_OutputTensorInfos.size(); ++i) + { + ValidateTensorDataTypesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[i], + "LstmQueueDescriptor", + "InputTensor[0]", + "OutputTensor[" + std::to_string(i) + "]"); + } - ValidateDataTypes(workloadInfo.m_OutputTensorInfos[0], - supportedTypes, - "LstmQueueDescriptor"); + // TODO: check clipping parameter is valid + + // Inferring batch size, number of outputs and number of cells from the inputs. + // TODO: figure out if there is a way to make sure the specific inputs are at that index of workloadInfo + const uint32_t n_input = workloadInfo.m_InputTensorInfos[0].GetShape()[1]; + const uint32_t n_batch = workloadInfo.m_InputTensorInfos[0].GetShape()[0]; + ValidatePointer(m_InputToOutputWeights, "Null pointer check", "InputToOutputWeights"); + const uint32_t n_cell = m_InputToOutputWeights->GetShape()[0]; + ValidatePointer(m_RecurrentToOutputWeights, "Null pointer check", "RecurrentToOutputWeights"); + const uint32_t n_output = m_RecurrentToOutputWeights->GetShape()[1]; + + // check dimensions of all inputs and outputs + if (workloadInfo.m_InputTensorInfos.size() != 3) + { + throw InvalidArgumentException("Invalid number of inputs."); + } + if (workloadInfo.m_OutputTensorInfos.size() != 4) + { + throw InvalidArgumentException("Invalid number of outputs."); + } + // input tensor + ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[0], 2, (n_batch * n_input), + "LstmQueueDescriptor input[0]"); + // outputStateInTensor + ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[1], 2, (n_batch * n_output), + "LstmQueueDescriptor input[1]"); + // outputStateInTensor + ValidateTensorNumDimNumElem( workloadInfo.m_InputTensorInfos[2], 2, (n_batch * n_cell), + "LstmQueueDescriptor input[2]"); + // scratchBufferTensor + unsigned int scratchBufferSize = m_Parameters.m_CifgEnabled ? n_cell * 3 : n_cell * 4; + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[0], 2, (n_batch * scratchBufferSize), + "LstmQueueDescriptor output[0]"); + // outputStateOutTensor + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[1], 2, (n_batch * n_output), + "LstmQueueDescriptor output[1]"); + // cellStateOutTensor + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[2], 2, (n_batch * n_cell), + "LstmQueueDescriptor output[2]"); + // outputTensor + ValidateTensorNumDimNumElem( workloadInfo.m_OutputTensorInfos[3], 2, (n_batch * n_output), + "LstmQueueDescriptor output[3]"); + + + // check that dimensions of inputs/outputs and QueueDescriptor data match with each other + if ( m_InputToInputWeights ) + { + ValidateTensorNumDimNumElem(m_InputToInputWeights->GetTensorInfo(), 2, + (n_cell * n_input), "InputLayerNormWeights"); + } + + ValidatePointer(m_InputToForgetWeights, "Null pointer check", "InputToForgetWeights"); + ValidateTensorNumDimNumElem(m_InputToForgetWeights->GetTensorInfo(), 2, + (n_cell * n_input), "InputToForgetWeights"); + + ValidatePointer(m_InputToCellWeights, "Null pointer check", "InputToCellWeights"); + ValidateTensorNumDimNumElem(m_InputToCellWeights->GetTensorInfo(), 2, + (n_cell * n_input), "InputToCellWeights"); + + if ( m_RecurrentToInputWeights ) + { + ValidateTensorNumDimNumElem(m_RecurrentToInputWeights->GetTensorInfo(), 2, + (n_cell * n_output), "RecurrentToInputWeights"); + } + + ValidatePointer(m_RecurrentToForgetWeights, "Null pointer check", "RecurrentToForgetWeights"); + ValidateTensorNumDimNumElem(m_RecurrentToForgetWeights->GetTensorInfo(), 2, + (n_cell * n_output), "RecurrentToForgetWeights"); + + ValidatePointer(m_RecurrentToCellWeights, "Null pointer check", "RecurrentToCellWeights"); + ValidateTensorNumDimNumElem(m_RecurrentToCellWeights->GetTensorInfo(), 2, + (n_cell * n_output), "RecurrentToCellWeights"); + + // Make sure the input-gate's parameters are either both present (regular + // LSTM) or not at all (CIFG-LSTM). And CifgEnable is set accordingly. + bool cifg_weights_all_or_none = ((m_InputToInputWeights && m_RecurrentToInputWeights && + !m_Parameters.m_CifgEnabled) || + (!m_InputToInputWeights && !m_RecurrentToInputWeights && + m_Parameters.m_CifgEnabled)); + if (!cifg_weights_all_or_none) + { + throw InvalidArgumentException("Input-Gate's parameters InputToInputWeights and RecurrentToInputWeights must " + "either both be present (regular LSTM) or both not present (CIFG-LSTM). In " + "addition CifgEnable must be set accordingly"); + } + + if ( m_CellToInputWeights ) + { + ValidateTensorNumDimNumElem(m_CellToInputWeights->GetTensorInfo(), 1, + n_cell, "CellToInputWeights"); + } + if ( m_CellToForgetWeights ) + { + ValidateTensorNumDimNumElem(m_CellToForgetWeights->GetTensorInfo(), 1, + n_cell, "CellToForgetWeights"); + } + if ( m_CellToOutputWeights ) + { + ValidateTensorNumDimNumElem(m_CellToOutputWeights->GetTensorInfo(), 1, + n_cell, "CellToOutputWeights"); + } + + // Making sure the peephole weights are there all or none. And PeepholeEnable is set accordingly. + bool peephole_weights_all_or_none = + (((m_CellToInputWeights || m_Parameters.m_CifgEnabled) && m_CellToForgetWeights + && m_CellToOutputWeights && m_Parameters.m_PeepholeEnabled) + || ( !m_CellToInputWeights && !m_CellToForgetWeights + && !m_CellToOutputWeights && !m_Parameters.m_PeepholeEnabled)); + if (!peephole_weights_all_or_none) + { + throw InvalidArgumentException("Invalid combination of peephole parameters"); + } + + // Make sure the input gate bias is present only when not a CIFG-LSTM. + if (m_Parameters.m_CifgEnabled) + { + if (m_InputGateBias) + { + throw InvalidArgumentException("InputGateBias is present and CIFG-LSTM is enabled"); + } + } + else + { + if (!m_InputGateBias) + { + throw InvalidArgumentException("If CIFG-LSTM is disabled InputGateBias must be present."); + } + ValidateTensorNumDimNumElem(m_InputGateBias->GetTensorInfo(), 1, + n_cell, "InputGateBias"); + } + + ValidatePointer(m_ForgetGateBias, "Null pointer check", "ForgetGateBias"); + ValidateTensorNumDimNumElem(m_ForgetGateBias->GetTensorInfo(), 1, n_cell, "ForgetGateBias"); + + ValidatePointer(m_CellBias, "Null pointer check", "CellBias"); + ValidateTensorNumDimNumElem(m_CellBias->GetTensorInfo(), 1, n_cell, "CellBias"); + + ValidatePointer(m_OutputGateBias, "Null pointer check", "OutputGateBias"); + ValidateTensorNumDimNumElem(m_OutputGateBias->GetTensorInfo(), 1, n_cell, "OutputGateBias"); + + if (m_ProjectionWeights) + { + ValidateTensorNumDimNumElem(m_ProjectionWeights->GetTensorInfo(), 2, + (n_cell * n_output), "ProjectionWeights"); + } + if (m_ProjectionBias) + { + ValidateTensorNumDimNumElem(m_ProjectionBias->GetTensorInfo(), 1, n_output, "ProjectionBias"); + } + + // Making sure the projection tensors are consistent: + // 1) If projection weight is not present, then projection bias should not be + // present. + // 2) If projection weight is present, then projection bias is optional. + bool projecton_tensors_consistent = ((!m_ProjectionWeights && !m_ProjectionBias && + !m_Parameters.m_ProjectionEnabled) + || (m_ProjectionWeights && !m_ProjectionBias && + m_Parameters.m_ProjectionEnabled) + || (m_ProjectionWeights && m_ProjectionBias && + m_Parameters.m_ProjectionEnabled)); + if (!projecton_tensors_consistent) + { + throw InvalidArgumentException("Projection tensors are inconsistent."); + } + + // The four layer normalization weights either all have values or none of them have values. Additionally, if + // CIFG is used, input layer normalization weights tensor is omitted and the other layer normalization weights + // either all have values or none of them have values. Layer normalization is used when the values of all the + // layer normalization weights are present + if (m_InputLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_InputLayerNormWeights->GetTensorInfo(), 1, n_cell, "InputLayerNormWeights"); + } + if (m_ForgetLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_ForgetLayerNormWeights->GetTensorInfo(), 1, n_cell, "ForgetLayerNormWeights"); + } + if (m_CellLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_CellLayerNormWeights->GetTensorInfo(), 1, n_cell, "CellLayerNormWeights"); + } + if (m_OutputLayerNormWeights) + { + ValidateTensorNumDimNumElem(m_OutputLayerNormWeights->GetTensorInfo(), 1, n_cell, "OutputLayerNormWeights"); + } + + + if (m_Parameters.m_LayerNormEnabled) + { + if (!m_Parameters.m_CifgEnabled) + { + if (!m_InputLayerNormWeights) + { + throw InvalidArgumentException("Layer normalisation is enabled and CIFG-LSTM is disabled but " + "InputLayerNormWeights are not present"); + } + ValidateTensorNumDimNumElem(m_InputLayerNormWeights->GetTensorInfo(), + 1, n_cell, "InputLayerNormWeights"); + } + else if (m_InputLayerNormWeights) + { + throw InvalidArgumentException("InputLayerNormWeights are present while CIFG is enabled"); + } + + ValidatePointer(m_ForgetLayerNormWeights, "Null pointer check layer normalisation enabled", + "ForgetLayerNormWeights"); + ValidateTensorNumDimNumElem(m_ForgetLayerNormWeights->GetTensorInfo(), 1, n_cell, "ForgetLayerNormWeights"); + + ValidatePointer(m_OutputLayerNormWeights, "Null pointer check layer normalisation enabled", + "OutputLayerNormWeights"); + ValidateTensorNumDimNumElem(m_OutputLayerNormWeights->GetTensorInfo(), 1, n_cell, "OutputLayerNormWeights"); + + ValidatePointer(m_CellLayerNormWeights, "Null pointer check layer normalisation enabled", + "CellLayerNormWeights"); + ValidateTensorNumDimNumElem(m_CellLayerNormWeights->GetTensorInfo(), 1, n_cell, "CellLayerNormWeights"); + } + else if (m_InputLayerNormWeights || m_ForgetLayerNormWeights || m_OutputLayerNormWeights || m_CellLayerNormWeights) + { + throw InvalidArgumentException("Layer normalisation is disabled but one or more layer normalisation weights " + "are present."); + } } void ConvertFp32ToFp16QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp index fa9e1cd..d241f7b 100644 --- a/src/backends/backendsCommon/WorkloadData.hpp +++ b/src/backends/backendsCommon/WorkloadData.hpp @@ -344,6 +344,10 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters , m_OutputGateBias(nullptr) , m_ProjectionWeights(nullptr) , m_ProjectionBias(nullptr) + , m_InputLayerNormWeights(nullptr) + , m_ForgetLayerNormWeights(nullptr) + , m_CellLayerNormWeights(nullptr) + , m_OutputLayerNormWeights(nullptr) { } @@ -364,6 +368,10 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters const ConstCpuTensorHandle* m_OutputGateBias; const ConstCpuTensorHandle* m_ProjectionWeights; const ConstCpuTensorHandle* m_ProjectionBias; + const ConstCpuTensorHandle* m_InputLayerNormWeights; + const ConstCpuTensorHandle* m_ForgetLayerNormWeights; + const ConstCpuTensorHandle* m_CellLayerNormWeights; + const ConstCpuTensorHandle* m_OutputLayerNormWeights; void Validate(const WorkloadInfo& workloadInfo) const; }; diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index b74b6af..8ef5985 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -396,6 +396,10 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, const TensorInfo* projectionBias = nullptr; const TensorInfo* cellToForgetWeights = nullptr; const TensorInfo* cellToOutputWeights = nullptr; + const TensorInfo* inputLayerNormWeights = nullptr; + const TensorInfo* forgetLayerNormWeights = nullptr; + const TensorInfo* cellLayerNormWeights = nullptr; + const TensorInfo* outputLayerNormWeights = nullptr; TensorInfo optInputToInputWeights; TensorInfo optRecurrentToInputWeights; @@ -405,6 +409,10 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, TensorInfo optProjectionBias; TensorInfo optCellToForgetWeights; TensorInfo optCellToOutputWeights; + TensorInfo optInputLayerNormWeights; + TensorInfo optForgetLayerNormWeights; + TensorInfo optCellLayerNormWeights; + TensorInfo optOutputLayerNormWeights; if(!descriptor.m_CifgEnabled) { @@ -449,6 +457,25 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, cellToOutputWeights = &optCellToOutputWeights; } + if(descriptor.m_LayerNormEnabled) + { + optInputLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_InputLayerNormWeights->GetTensorInfo(), dataType); + inputLayerNormWeights = &optInputLayerNormWeights; + + optForgetLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_ForgetLayerNormWeights->GetTensorInfo(), dataType); + forgetLayerNormWeights = &optForgetLayerNormWeights; + + optCellLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_CellLayerNormWeights->GetTensorInfo(), dataType); + cellLayerNormWeights = &optCellLayerNormWeights; + + optOutputLayerNormWeights = OverrideDataType( + cLayer->m_LayerNormParameters.m_OutputLayerNormWeights->GetTensorInfo(), dataType); + outputLayerNormWeights = &optOutputLayerNormWeights; + } + result = layerSupportObject->IsLstmSupported( input, outputStateIn, @@ -475,7 +502,11 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, projectionBias, cellToForgetWeights, cellToOutputWeights, - reason); + reason, + inputLayerNormWeights, + forgetLayerNormWeights, + cellLayerNormWeights, + outputLayerNormWeights); break; } case LayerType::Maximum: diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp index ca39438..56c0ab6 100644 --- a/src/backends/backendsCommon/test/LayerTests.cpp +++ b/src/backends/backendsCommon/test/LayerTests.cpp @@ -1665,6 +1665,153 @@ LayerTestResult CopyViaSplitterInt16Test( return CopyViaSplitterTestImpl(workloadFactory, memoryManager, 1.0f, 0); } +void LstmUtilsZeroVectorTest() +{ + armnn::TensorInfo inputDesc({4}, armnn::DataType::Float32); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {2., 3., 3., 4.})); + + boost::multi_array expectedOutput = MakeTensor(inputDesc, std::vector( + {0., 0., 0., 0.})); + + return LstmUtilsZeroVectorTestImpl(input, 4, expectedOutput); +} + +void LstmUtilsMeanStddevNormalizationNoneZeroInputTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 4; + armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + { 0.1f, 0.2f, 0.3f, 0.4f, //batch 0 + 0.9f, 1.0f, 1.1f, 1.2f })); //batch 1 + + boost::multi_array expectedOutput = MakeTensor(inputDesc, std::vector( + { -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f, //batch 0 + -1.34163153f, -0.447210163f, 0.447211236f, 1.3416326f })); //batch 1 + + return LstmUtilsMeanStddevNormalizationTestImpl(input, + vecSize, batchSize, expectedOutput); +} + +void LstmUtilsMeanStddevNormalizationAllZeroInputTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 4; + armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + 0.0f, 0.0f, 0.0f, 0.0f })); //batch 1 + + boost::multi_array expectedOutput = MakeTensor(inputDesc, std::vector( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + 0.0f, 0.0f, 0.0f, 0.0f })); //batch 1 + + return LstmUtilsMeanStddevNormalizationTestImpl(input, + vecSize, batchSize, expectedOutput); +} + +void LstmUtilsMeanStddevNormalizationMixedZeroInputTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 4; + armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + 0.1f, 0.2f, 0.3f, 0.4f })); //batch 1 + + boost::multi_array expectedOutput = MakeTensor(inputDesc, std::vector( + { 0.0f, 0.0f, 0.0f, 0.0f, //batch 0 + -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f })); //batch 1 + + return LstmUtilsMeanStddevNormalizationTestImpl(input, + vecSize, batchSize, expectedOutput); +} + + +void LstmUtilsVectorBatchVectorCwiseProductTest() +{ + uint32_t batchSize = 4; + uint32_t vecSize = 29; + armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32); + boost::multi_array vector = MakeTensor(vecDesc, std::vector( + { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.1f, + 11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f, + 21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f, 0.0f})); + + armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array batchVector = MakeTensor(batchVecDesc, std::vector( + { /* batch 0 */ + 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.1f, + 11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f, + 21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f, 0.0f, + /* batch 1 */ + -1.1f, -2.2f, -3.3f, -4.4f, -5.5f, -6.6f, -7.7f, -8.8f, -9.9f, -10.1f, + -11.11f, -12.12f, -13.13f, -14.14f, -15.15f, -16.16f, -17.17f, -18.18f, -19.19f, -20.2f, + -21.21f, -22.22f, -23.23f, -24.24f, -25.25f, -26.26f, -27.27f, -28.28f, 0.0f, + /* batch 2 */ + 1.1f, -2.2f, 3.3f, -4.4f, 5.5f, -6.6f, 7.7f, -8.8f, 9.9f, -10.1f, + 11.11f, -12.12f, 13.13f, -14.14f, 15.15f, -16.16f, 17.17f, -18.18f, 19.19f, -20.2f, + 21.21f, -22.22f, 23.23f, -24.24f, 25.25f, -26.26f, 27.27f, -28.28f, 0.0f, + /* batch 3 */ + -1.1f, 2.2f, -3.3f, 4.4f, -5.5f, 6.6f, -7.7f, 8.8f, -9.9f, 10.1f, + -11.11f, 12.12f, -13.13f, 14.14f, -15.15f, 16.16f, -17.17f, 18.18f, -19.19f, 20.2f, + -21.21f, 22.22f, -23.23f, 24.24f, -25.25f, 26.26f, -27.27f, 28.28f, 0.0f})); + + // Expect output = input * output + output. + boost::multi_array expectedOutput = MakeTensor(batchVecDesc, std::vector( + { /* batch 0 */ + 1.210000f, 4.840000f, 10.889999f, 19.360001f, 30.250000f, 43.559998f, + 59.289997f, 77.440002f, 98.009995f, 102.010010f, 123.432091f, 146.894394f, + 172.396896f, 199.939606f, 229.522491f, 261.145599f, 294.808899f, 330.512421f, + 368.256134f, 408.040039f, 449.864075f, 493.728363f, 539.632874f, 587.577576f, + 637.562500f, 689.587585f, 743.652954f, 799.758423f, 0.000000f, + /* batch 1 */ + -1.210000f, -4.840000f, -10.889999f, -19.360001f, -30.250000f, -43.559998f, + -59.289997f, -77.440002f, -98.009995f, -102.010010f, -123.432091f, -146.894394f, + -172.396896f, -199.939606f, -229.522491f, -261.145599f, -294.808899f, -330.512421f, + -368.256134f, -408.040039f, -449.864075f, -493.728363f, -539.632874f, -587.577576f, + -637.562500f, -689.587585f, -743.652954f, -799.758423f, 0.000000f, + /* batch 2 */ + 1.210000f, -4.840000f, 10.889999f, -19.360001f, 30.250000f, -43.559998f, + 59.289997f, -77.440002f, 98.009995f, -102.010010f, 123.432091f, -146.894394f, + 172.396896f, -199.939606f, 229.522491f, -261.145599f, 294.808899f, -330.512421f, + 368.256134f, -408.040039f, 449.864075f, -493.728363f, 539.632874f, -587.577576f, + 637.562500f, -689.587585f, 743.652954f, -799.758423f, 0.000000f, + /* batch 3 */ + -1.210000f, 4.840000f, -10.889999f, 19.360001f, -30.250000f, 43.559998f, + -59.289997f, 77.440002f, -98.009995f, 102.010010f, -123.432091f, 146.894394f, + -172.396896f, 199.939606f, -229.522491f, 261.145599f, -294.808899f, 330.512421f, + -368.256134f, 408.040039f, -449.864075f, 493.728363f, -539.632874f, 587.577576f, + -637.562500f, 689.587585f, -743.652954f, 799.758423f, 0.000000f})); + + return LstmUtilsVectorBatchVectorCwiseProductTestImpl(vector, batchVector, + vecSize, batchSize, expectedOutput); +} + + +void LstmUtilsVectorBatchVectorAddTest() +{ + uint32_t batchSize = 2; + uint32_t vecSize = 3; + armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32); + boost::multi_array vector = MakeTensor(vecDesc, std::vector( + { 0.0f, -0.5f, 1.0f})); + + armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32); + boost::multi_array batchVector = MakeTensor(batchVecDesc, std::vector( + { 1.0f, 2.0f, 3.0f, //batch 0 + 4.0f, 5.0f, 6.0f})); //batch 1 + + boost::multi_array expectedOutput = MakeTensor(batchVecDesc, std::vector( + { 1.0f, 1.5f, 4.0f, + 4.0f, 4.5f, 7.0f})); + + return LstmUtilsVectorBatchVectorAddTestImpl(vector, batchVector, + vecSize, batchSize, expectedOutput); +} + + LayerTestResult LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) @@ -1721,6 +1868,25 @@ LayerTestResult LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest( workloadFactory, memoryManager, input, expectedOutput); } + +LayerTestResult LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {0.7f, 0.8f, 0.1f, 0.2f, 0.3f, //batch 0 + 0.3f, 0.2f, 0.9f, 0.8f, 0.1f})); //batch 1 + + armnn::TensorInfo outputDesc({ 2, 3 }, armnn::DataType::Float32); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + { 0.0244077f, 0.128027f, -0.00170918f, //batch 0 + -0.00692428f, 0.0848741f, 0.063445f})); //batch 1 + return LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl( + workloadFactory, memoryManager, input, expectedOutput); +} + + LayerTestResult LstmLayerInt16NoCifgNoPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index 405ccff..66324e1 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -1458,6 +1458,13 @@ LayerTestResult PermuteFloat32ValueSet3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +void LstmUtilsZeroVectorTest(); +void LstmUtilsMeanStddevNormalizationNoneZeroInputTest(); +void LstmUtilsMeanStddevNormalizationAllZeroInputTest(); +void LstmUtilsMeanStddevNormalizationMixedZeroInputTest(); +void LstmUtilsVectorBatchVectorCwiseProductTest(); +void LstmUtilsVectorBatchVectorAddTest(); + LayerTestResult LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); @@ -1470,6 +1477,10 @@ LayerTestResult LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +LayerTestResult LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + LayerTestResult LstmLayerInt16NoCifgNoPeepholeNoProjectionTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/backendsCommon/test/LstmTestImpl.hpp b/src/backends/backendsCommon/test/LstmTestImpl.hpp index dae9c8a..2ed0a97 100644 --- a/src/backends/backendsCommon/test/LstmTestImpl.hpp +++ b/src/backends/backendsCommon/test/LstmTestImpl.hpp @@ -16,6 +16,119 @@ #include #include +#include "reference/workloads/LstmUtils.hpp" + +//LstmUtils Tests +// TODO: Add tests for the remaining functions in LstmUtils.hpp + +template> +void LstmUtilsVectorBatchVectorAddTestImpl( + boost::multi_array& vec, + boost::multi_array& batchVec, + uint32_t vSize, + uint32_t nBatch, + boost::multi_array& expectedOutput ) +{ + float qScale = 0.0f; + int32_t qOffset = 0; + armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder and decoder + std::unique_ptr> vecDecoder = armnn::MakeDecoder(tensorInfo, vec.data()); + std::unique_ptr> batchVecDecoder = armnn::MakeDecoder(tensorInfo, batchVec.data()); + std::unique_ptr> batchVecEncoder = armnn::MakeEncoder(tensorInfo, batchVec.data()); + + VectorBatchVectorAdd(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder); + + // check shape and compare values + BOOST_TEST(CompareTensors(batchVec, expectedOutput)); + + // check if iterator is back at start position + batchVecEncoder->Set(1.0f); + BOOST_TEST(batchVec[0][0] == 1.0f); +} + +template> +void LstmUtilsZeroVectorTestImpl( + boost::multi_array& input, + uint32_t vSize, + boost::multi_array& expectedOutput) { + + float qScale = 0.0f; + int32_t qOffset = 0; + + armnn::TensorInfo tensorInfo({vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder for input + std::unique_ptr> outputEncoder = armnn::MakeEncoder(tensorInfo, input.data()); + + // call ZeroVector + ZeroVector(*outputEncoder, vSize); + + // check shape and compare values + BOOST_TEST(CompareTensors(input, expectedOutput)); + + // check if iterator is back at start position + outputEncoder->Set(1.0f); + BOOST_TEST(input[0] == 1.0f); + +} + + +template> +void LstmUtilsMeanStddevNormalizationTestImpl( + boost::multi_array& input, + uint32_t vSize, + uint32_t nBatch, + boost::multi_array& expectedOutput) +{ + float qScale = 0.0f; + int32_t qOffset = 0; + armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder and decoder for input + std::unique_ptr> inputDecoder = armnn::MakeDecoder(tensorInfo, input.data()); + std::unique_ptr> outputEncoder = armnn::MakeEncoder(tensorInfo, input.data()); + + MeanStddevNormalization(*inputDecoder, *outputEncoder, vSize, nBatch, 1e-8f); + + // check shape and compare values + BOOST_TEST(CompareTensors(input, expectedOutput)); + + // check if iterator is back at start position + outputEncoder->Set(1.0f); + BOOST_TEST(input[0][0] == 1.0f); +} + +template> +void LstmUtilsVectorBatchVectorCwiseProductTestImpl( + boost::multi_array& vec, + boost::multi_array& batchVec, + uint32_t vSize, + uint32_t nBatch, + boost::multi_array& expectedOutput) +{ + float qScale = 0.0f; + int32_t qOffset = 0; + armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType, qScale, qOffset ); + + // Make encoder and decoder + std::unique_ptr> vecDecoder = armnn::MakeDecoder(tensorInfo, vec.data()); + std::unique_ptr> batchVecDecoder = armnn::MakeDecoder(tensorInfo, batchVec.data()); + std::unique_ptr> batchVecEncoder = armnn::MakeEncoder(tensorInfo, batchVec.data()); + + VectorBatchVectorCwiseProduct(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder); + + // check shape and compare values + BOOST_TEST(CompareTensors(batchVec, expectedOutput)); + + // check if iterator is back at start position + batchVecEncoder->Set(1.0f); + BOOST_TEST(batchVec[0][0] == 1.0f); +} + +// Lstm Layer tests: + template> LayerTestResult LstmNoCifgNoPeepholeNoProjectionTestImpl( @@ -187,7 +300,6 @@ LstmNoCifgNoPeepholeNoProjectionTestImpl( data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; - data.m_CellToInputWeights = &cellToInputWeightsTensor; data.m_InputGateBias = &inputGateBiasTensor; data.m_ForgetGateBias = &forgetGateBiasTensor; data.m_CellBias = &cellBiasTensor; @@ -1157,3 +1269,275 @@ LayerTestResult LstmLayerWithCifgWithPeepholeNoProjectionTestImpl( return ret3; } + + +template> +LayerTestResult +LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array& input, + const boost::multi_array& outputExpected, + float qScale = 0.0f, + int32_t qOffset = 0, + armnn::DataType constantDataType = armnn::DataType::Float32) +{ + unsigned int batchSize = 2; + unsigned int outputSize = 3; + unsigned int inputSize = 5; + unsigned numUnits = 4; + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, ArmnnType, qScale, qOffset); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, ArmnnType, qScale, qOffset); + + // Scratch buffer size without CIFG [batchSize, numUnits * 4] + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, ArmnnType, qScale, qOffset); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset); + + LayerTestResult ret(outputTensorInfo); + + std::vector inputVector; + inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); + auto inputTensor = MakeTensor(inputTensorInfo, inputVector); + + std::vector cellStateInVector(batchSize * numUnits, 0.f); + auto cellStateInTensor = MakeTensor(cellStateInTensorInfo, cellStateInVector); + + std::vector outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor(outputStateInTensorInfo, outputStateInVector); + + std::vector scratchBufferVector(batchSize * numUnits * 4, 0.f); + auto scratchBufferTensor = MakeTensor(scratchBufferTensorInfo, scratchBufferVector); + + std::vector outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor(outputStateOutTensorInfo, outputStateOutVector); + + std::vector cellStateOutVector(batchSize * numUnits, 0.f); + auto cellStateOutTensor = MakeTensor(cellStateOutTensorInfo, cellStateOutVector); + + std::vector outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); + ret.outputExpected = MakeTensor(outputTensorInfo, outputVector); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + + std::unique_ptr scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::LstmQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::TensorInfo tensorInfo3({outputSize}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4({numUnits}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x5({numUnits, inputSize}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x3({numUnits, outputSize}, constantDataType, qScale, qOffset); + armnn::TensorInfo tensorInfo3x4({outputSize, numUnits}, constantDataType, qScale, qOffset); + + auto inputToInputWeights = + MakeTensor(tensorInfo4x5, { 0.5f, 0.6f, 0.7f, -0.8f, -0.9f, + 0.1f, 0.2f, 0.3f, -0.4f, 0.5f, + -0.8f, 0.7f, -0.6f, 0.5f, -0.4f, + -0.5f, -0.4f, -0.3f, -0.2f, -0.1f}); //{numUnits, inputSize} + + auto inputToForgetWeights = + MakeTensor(tensorInfo4x5, {-0.6f, -0.1f, 0.3f, 0.2f, 0.9f, + -0.5f, -0.2f, -0.4f, 0.3f, -0.8f, + -0.4f, 0.3f, -0.5f, -0.4f, -0.6f, + 0.3f, -0.4f, -0.6f, -0.5f, -0.5f}); //{numUnits, inputSize} + + auto inputToCellWeights = + MakeTensor(tensorInfo4x5, {-0.4f, -0.3f, -0.2f, -0.1f, -0.5f, + 0.5f, -0.2f, -0.3f, -0.2f, -0.6f, + 0.6f, -0.1f, -0.4f, -0.3f, -0.7f, + 0.7f, -0.9f, -0.5f, 0.8f, 0.6f}); //{numUnits, inputSize} + + auto inputToOutputWeights = + MakeTensor(tensorInfo4x5, {-0.8f, -0.4f, -0.2f, -0.9f, -0.1f, + -0.7f, 0.3f, -0.3f, -0.8f, -0.2f, + 0.6f, -0.2f, 0.4f, -0.7f, -0.3f, + -0.5f, 0.1f, 0.5f, -0.6f, -0.4f}); //{numUnits, inputSize} + + auto inputGateBias = + MakeTensor(tensorInfo4, {0.03f, 0.15f, 0.22f, 0.38f}); //{numUnits} + + auto forgetGateBias = + MakeTensor(tensorInfo4, {0.1f, -0.3f, -0.2f, 0.1f}); //{numUnits} + + auto cellBias = + MakeTensor(tensorInfo4, {-0.05f, 0.72f, 0.25f, 0.08f}); //{numUnits} + + auto outputGateBias = + MakeTensor(tensorInfo4, {0.05f, -0.01f, 0.2f, 0.1f}); //{numUnits} + + auto recurrentToInputWeights = + MakeTensor(tensorInfo4x3, {-0.2f, -0.3f, 0.4f, + 0.1f, -0.5f, 0.9f, + -0.2f, -0.3f, -0.7f, + 0.05f, -0.2f, -0.6f}); //{numUnits, outputSize} + + auto recurrentToCellWeights = + MakeTensor(tensorInfo4x3, {-0.3f, 0.2f, 0.1f, + -0.3f, 0.8f, -0.08f, + -0.2f, 0.3f, 0.8f, + -0.6f, -0.1f, 0.2f}); //{numUnits, outputSize} + + auto recurrentToForgetWeights = + MakeTensor(tensorInfo4x3, {-0.5f, -0.3f, -0.5f, + -0.2f, 0.6f, 0.4f, + 0.9f, 0.3f, -0.1f, + 0.2f, 0.5f, 0.2f}); //{numUnits, outputSize} + + auto recurrentToOutputWeights = + MakeTensor(tensorInfo4x3, { 0.3f, -0.1f, 0.1f, + -0.2f, -0.5f, -0.7f, + -0.2f, -0.6f, -0.1f, + -0.4f, -0.7f, -0.2f}); //{numUnits, outputSize} + + auto cellToInputWeights = + MakeTensor(tensorInfo4, {0.05f, 0.1f, 0.25f, 0.15f}); //{numUnits} + + auto cellToForgetWeights = + MakeTensor(tensorInfo4, {-0.02f, -0.15f, -0.25f, -0.03f}); //{numUnits} + + auto cellToOutputWeights = + MakeTensor(tensorInfo4, {0.1f, -0.1f, -0.5f, 0.05f}); //{numUnits} + + auto projectionWeights = + MakeTensor(tensorInfo3x4, + {-0.1f, 0.2f, 0.01f, -0.2f, + 0.1f, 0.5f, 0.3f, 0.08f, + 0.07f, 0.2f, -0.4f, 0.2f}); //{outputSize, numUnits} + + std::vector projectionBiasVector(outputSize, 0.f); + auto projectionBias = MakeTensor(tensorInfo3, projectionBiasVector); //{outputSize} + + auto inputLayerNormWeights = + MakeTensor(tensorInfo4, {0.1f, 0.2f, 0.3f, 0.5f}); //{numUnits} + + auto forgetLayerNormWeights = + MakeTensor(tensorInfo4, {0.2f, 0.2f, 0.4f, 0.3f}); //{numUnits} + + auto cellLayerNormWeights = + MakeTensor(tensorInfo4, {0.7f, 0.2f, 0.3f, 0.8f}); //{numUnits} + + auto outputLayerNormWeights = + MakeTensor(tensorInfo4, {0.6f, 0.2f, 0.2f, 0.5f}); //{numUnits} + + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo3x4); + armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo3); + + armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(tensorInfo4); + + AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); + AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); + AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]); + + AllocateAndCopyDataToITensorHandle(&inputLayerNormWeightsTensor, &inputLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + data.m_ProjectionWeights = &projectionWeightsTensor; + data.m_ProjectionBias = &projectionBiasTensor; + + data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor; + data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor; + data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor; + data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor; + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = true; + data.m_Parameters.m_ProjectionEnabled = true; + data.m_Parameters.m_LayerNormEnabled = true; + + + std::unique_ptr workload = workloadFactory.CreateLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; + +} \ No newline at end of file diff --git a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp index 7c7af2d..c696098 100644 --- a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp +++ b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp @@ -453,22 +453,139 @@ BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) BOOST_AUTO_TEST_CASE(LstmQueueDescriptor_Validate) { - armnn::TensorInfo inputTensorInfo; - armnn::TensorInfo outputTensorInfo; - - unsigned int inputShape[] = { 1, 2 }; - unsigned int outputShape[] = { 1 }; - - inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(1, outputShape, armnn::DataType::Float32); - - LstmQueueDescriptor invalidData; - WorkloadInfo invalidInfo; - - AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - - BOOST_CHECK_THROW(invalidData.Validate(invalidInfo), armnn::InvalidArgumentException); + armnn::DataType dataType = armnn::DataType::Float32; + + float qScale = 0.0f; + int32_t qOffset = 0; + + unsigned int batchSize = 2; + unsigned int outputSize = 3; + unsigned int inputSize = 5; + unsigned numUnits = 4; + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, dataType, qScale, qOffset ); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, dataType, qScale, qOffset); + + // Scratch buffer size with CIFG [batchSize, numUnits * 4] + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, dataType, qScale, qOffset); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, dataType, qScale, qOffset); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset); + + armnn::TensorInfo tensorInfo3({outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4({numUnits}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x5({numUnits, inputSize}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo4x3({numUnits, outputSize}, dataType, qScale, qOffset); + armnn::TensorInfo tensorInfo3x4({outputSize, numUnits}, dataType, qScale, qOffset); + + LstmQueueDescriptor data; + WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, nullptr); + AddInputToWorkload(data, info, outputStateInTensorInfo, nullptr); + AddInputToWorkload(data, info, cellStateInTensorInfo, nullptr); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, nullptr); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, nullptr); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, nullptr); + // AddOutputToWorkload(data, info, outputTensorInfo, nullptr); is left out + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo4x5); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo4x3); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo3x4); + armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo3); + armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(tensorInfo4); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + data.m_ProjectionWeights = &projectionWeightsTensor; + data.m_ProjectionBias = &projectionBiasTensor; + + data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor; + data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor; + data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor; + data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor; + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = true; + data.m_Parameters.m_ProjectionEnabled = true; + data.m_Parameters.m_LayerNormEnabled = true; + + // check wrong number of outputs + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + AddOutputToWorkload(data, info, outputTensorInfo, nullptr); + + // check wrong cifg parameter configuration + data.m_Parameters.m_CifgEnabled = true; + armnn::TensorInfo scratchBufferTensorInfo2({batchSize, numUnits * 3}, dataType, qScale, qOffset); + SetWorkloadOutput(data, info, 0, scratchBufferTensorInfo2, nullptr); + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_Parameters.m_CifgEnabled = false; + SetWorkloadOutput(data, info, 0, scratchBufferTensorInfo, nullptr); + + // check wrong inputGateBias configuration + data.m_InputGateBias = nullptr; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_InputGateBias = &inputGateBiasTensor; + + // check inconsistant projection parameters + data.m_Parameters.m_ProjectionEnabled = false; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_Parameters.m_ProjectionEnabled = true; + data.m_ProjectionWeights = nullptr; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_ProjectionWeights = &projectionWeightsTensor; + + // check missing input layer normalisation weights + data.m_InputLayerNormWeights = nullptr; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor; + + // layer norm disabled but normalisation weights are present + data.m_Parameters.m_LayerNormEnabled = false; + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + data.m_Parameters.m_LayerNormEnabled = true; + + // check invalid outputTensor shape + armnn::TensorInfo incorrectOutputTensorInfo({batchSize, outputSize + 1}, dataType, qScale, qOffset); + SetWorkloadOutput(data, info, 3, incorrectOutputTensorInfo, nullptr); + BOOST_CHECK_THROW(data.Validate(info), armnn::InvalidArgumentException); + SetWorkloadOutput(data, info, 3, outputTensorInfo, nullptr); + + // check correct configuration + BOOST_CHECK_NO_THROW(data.Validate(info)); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index 12c2efe..ec134a1 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -420,7 +420,11 @@ bool ClLayerSupport::IsLstmSupported(const TensorInfo& input, const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional reasonIfUnsupported) const + Optional reasonIfUnsupported, + const TensorInfo* inputLayerNormWeights, + const TensorInfo* forgetLayerNormWeights, + const TensorInfo* cellLayerNormWeights, + const TensorInfo* outputLayerNormWeights) const { FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate, reasonIfUnsupported, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index f2f25af..4d0f5bd 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -131,7 +131,11 @@ public: const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional reasonIfUnsupported = EmptyOptional()) const override; + Optional reasonIfUnsupported = EmptyOptional(), + const TensorInfo* inputLayerNormWeights = nullptr, + const TensorInfo* forgetLayerNormWeights = nullptr, + const TensorInfo* cellLayerNormWeights = nullptr, + const TensorInfo* outputLayerNormWeights = nullptr) const override; bool IsMaximumSupported(const TensorInfo& input0, const TensorInfo& input1, diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index b563bad..3d260c5 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -861,7 +861,11 @@ bool RefLayerSupport::IsLstmSupported(const TensorInfo& input, const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional reasonIfUnsupported) const + Optional reasonIfUnsupported, + const TensorInfo* inputLayerNormWeights, + const TensorInfo* forgetLayerNormWeights, + const TensorInfo* cellLayerNormWeights, + const TensorInfo* outputLayerNormWeights) const { ignore_unused(descriptor); ignore_unused(inputToForgetWeights); @@ -881,6 +885,10 @@ bool RefLayerSupport::IsLstmSupported(const TensorInfo& input, ignore_unused(projectionBias); ignore_unused(cellToForgetWeights); ignore_unused(cellToOutputWeights); + ignore_unused(inputLayerNormWeights); + ignore_unused(forgetLayerNormWeights); + ignore_unused(cellLayerNormWeights); + ignore_unused(outputLayerNormWeights); bool supported = true; diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp index 22b007b..ead4d1c 100644 --- a/src/backends/reference/RefLayerSupport.hpp +++ b/src/backends/reference/RefLayerSupport.hpp @@ -155,7 +155,11 @@ public: const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, - Optional reasonIfUnsupported = EmptyOptional()) const override; + Optional reasonIfUnsupported = EmptyOptional(), + const TensorInfo* inputLayerNormWeights = nullptr, + const TensorInfo* forgetLayerNormWeights = nullptr, + const TensorInfo* cellLayerNormWeights = nullptr, + const TensorInfo* outputLayerNormWeights = nullptr) const override; bool IsMaximumSupported(const TensorInfo& input0, const TensorInfo& input1, diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk index 12e5774..a736a88 100644 --- a/src/backends/reference/backend.mk +++ b/src/backends/reference/backend.mk @@ -22,6 +22,7 @@ BACKEND_SOURCES := \ workloads/ElementwiseFunction.cpp \ workloads/FullyConnected.cpp \ workloads/Gather.cpp \ + workloads/LstmUtils.cpp \ workloads/Mean.cpp \ workloads/Concatenate.cpp \ workloads/Pad.cpp \ diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index 7797f17..9f89c8c 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -827,6 +827,17 @@ ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) // Lstm +BOOST_AUTO_TEST_CASE(LstmUtilsZeroVector) { + LstmUtilsZeroVectorTest(); } +BOOST_AUTO_TEST_CASE(LstmUtilsMeanStddevNormalization) { + LstmUtilsMeanStddevNormalizationNoneZeroInputTest(); + LstmUtilsMeanStddevNormalizationAllZeroInputTest(); + LstmUtilsMeanStddevNormalizationMixedZeroInputTest(); } +BOOST_AUTO_TEST_CASE(LstmUtilsVectorBatchVectorCwiseProduct) { + LstmUtilsVectorBatchVectorCwiseProductTest(); } +BOOST_AUTO_TEST_CASE(LstmUtilsVectorBatchVectorAdd) { + LstmUtilsVectorBatchVectorAddTest(); } + ARMNN_AUTO_TEST_CASE(LstmLayerFloat32WithCifgWithPeepholeNoProjection, LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest) ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection, @@ -834,6 +845,9 @@ ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection, ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjection, LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest) +ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNorm, + LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest) + ARMNN_AUTO_TEST_CASE(LstmLayerInt16NoCifgNoPeepholeNoProjection, LstmLayerInt16NoCifgNoPeepholeNoProjectionTest) ARMNN_AUTO_TEST_CASE(LstmLayerInt16WithCifgWithPeepholeNoProjection, diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt index 3c0af01..696605d 100644 --- a/src/backends/reference/workloads/CMakeLists.txt +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -28,6 +28,7 @@ list(APPEND armnnRefBackendWorkloads_sources Gather.cpp Gather.hpp LstmUtils.hpp + LstmUtils.cpp Maximum.hpp Mean.cpp Mean.hpp diff --git a/src/backends/reference/workloads/LstmUtils.cpp b/src/backends/reference/workloads/LstmUtils.cpp new file mode 100644 index 0000000..f197aae --- /dev/null +++ b/src/backends/reference/workloads/LstmUtils.cpp @@ -0,0 +1,307 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +//#pragma once + +#include "LstmUtils.hpp" +#include "BaseIterator.hpp" +#include + + +// Helper functions ported from the Android code base +// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc + +void VectorBatchVectorAdd(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult ) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(batchVector.Get() + vector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + + +// Layer norm for each batch. +// normalization_epsilon is added to avoid divergence. +void MeanStddevNormalization(armnn::Decoder& input_vector, + armnn::Encoder& output_vector, + uint32_t v_size, + uint32_t n_batch, + float normalization_epsilon) +{ + for (uint32_t batch = 0; batch < n_batch; ++batch) { + float sum = 0.0f; + float sum_sq = 0.0f; + for (uint32_t i = 0; i < v_size; ++i) { + sum += input_vector.Get(); + sum_sq += input_vector.Get() * input_vector.Get(); + ++input_vector; + } + input_vector -= v_size; + + const float mean = sum / static_cast(v_size); + float stddev_inv = 0.0f; + const float variance = sum_sq / static_cast(v_size) - mean * mean; + if (variance == 0) { + stddev_inv = 1.0f / std::sqrt(normalization_epsilon); + } else { + stddev_inv = 1.0f / std::sqrt(variance); + } + + for (uint32_t i = 0; i < v_size; ++i) { + output_vector.Set((input_vector.Get() - mean) * stddev_inv); + ++output_vector; + ++input_vector; + } + // Don't reset iterator to handle next batch + } + output_vector -= v_size * n_batch; + input_vector -= v_size * n_batch; +} + +void ZeroVector(armnn::Encoder& vector, + uint32_t vSize) +{ + for (uint32_t v = 0; v < vSize; v++) + { + vector.Set(0.0f); + ++vector; + } + vector -= vSize; +} + +void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder& matrix, + uint32_t mRows, + uint32_t mCols, + armnn::Decoder& vector, + uint32_t nBatch, + armnn::Encoder& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t r = 0; r < mRows; r++) + { + vector += b * mCols; + for (uint32_t c = 0; c < mCols; c++) + { + outResult.Set(outResult.Get() + matrix.Get() * vector.Get()); + ++matrix; + ++vector; + } + outResult += 1; + vector -= (b+1) * mCols; + } + matrix -= (mRows * mCols); + } + outResult -= (mRows * nBatch); +} + +void VectorBatchVectorAssign(armnn::Decoder& vector, + uint32_t vSize, + uint32_t nBatch, + armnn::Encoder& outBatchVector) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outBatchVector.Set(vector.Get()); + ++outBatchVector; + ++vector; + } + vector -= vSize; + } + outBatchVector -= (nBatch * vSize); +} + +void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(outResult.Get() + vector.Get() * batchVector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + +void VectorBatchVectorCwiseProduct(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector.Get() * batchVector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + +void Sub1Vector(armnn::Decoder& vector, + uint32_t vSize, + armnn::Encoder& result) +{ + for (uint32_t v = 0; v < vSize; v++) + { + result.Set(1.0f - vector.Get()); + ++vector; + ++result; + } + vector -= vSize; + result -= vSize; +} + +void VectorVectorCwiseProduct(armnn::Decoder& vector1, + armnn::Decoder& vector2, + uint32_t vSize, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector1.Get() * vector2.Get()); + ++outResult; + ++vector1; + ++vector2; + } + outResult -= vSize; + vector1 -= vSize; + vector2 -= vSize; +} + +void VectorVectorCwiseProductAccumulate(armnn::Decoder& vector1, + armnn::Decoder& vector2, + uint32_t vSize, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(outResult.Get() + vector1.Get() * vector2.Get()); + ++outResult; + ++vector1; + ++vector2; + } + outResult -= vSize; + vector1 -= vSize; + vector2 -= vSize; +} + +float Clip(float f, + float absLimit) +{ + float result = (absLimit < f) ? absLimit : f; + result = (-absLimit > result) ? -absLimit : result; + return result; +} + +void ClipVector(armnn::Decoder& vector, + uint32_t vSize, + float absLimit, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(Clip(vector.Get(), absLimit)); + ++vector; + ++outResult; + } + vector -= vSize; + outResult -= vSize; +} + +void CopyVector(armnn::Decoder& vector, + uint32_t vSize, + armnn::Encoder& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector.Get()); + ++outResult; + ++vector; + } + outResult -= vSize; + vector -= vSize; +} + +void SetActivationParameters(uint32_t activation, + armnn::ActivationFunction& outArmnnActivation, + float& outA, + float& outB) +{ + switch (activation) + { + case 0: // None + outA = 0; + outB = 0; + return; + + case 1: // Relu + outArmnnActivation = armnn::ActivationFunction::ReLu; + outA = 0; + outB = 0; + return; + + case 3: // Relu6 + outArmnnActivation = armnn::ActivationFunction::BoundedReLu; + outA = 6; + outB = 0; + return; + + case 4: // Tanh + outArmnnActivation = armnn::ActivationFunction::TanH; + outA = 1; + outB = 1; + return; + + case 6: // Sigmoid + outArmnnActivation = armnn::ActivationFunction::Sigmoid; + outA = 0; + outB = 0; + return; + + default: + throw armnn::Exception("Unsupported activation function: " + std::to_string(activation)); + } +} + +std::unique_ptr AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr) +{ + if (!ptr) + { + return nullptr; + } + + return std::make_unique(*ptr); +} diff --git a/src/backends/reference/workloads/LstmUtils.hpp b/src/backends/reference/workloads/LstmUtils.hpp index db02a84..f6aff8b 100644 --- a/src/backends/reference/workloads/LstmUtils.hpp +++ b/src/backends/reference/workloads/LstmUtils.hpp @@ -8,211 +8,81 @@ #include "BaseIterator.hpp" #include -namespace -{ - // Helper functions ported from the Android code base // Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc + +void VectorBatchVectorAdd(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult ); + +// Layer norm for each batch. +// normalization_epsilon is added to avoid divergence. +void MeanStddevNormalization(armnn::Decoder& input_vector, + armnn::Encoder& output_vector, + uint32_t v_size, + uint32_t n_batch, + float normalization_epsilon); + +void ZeroVector(armnn::Encoder& vector, + uint32_t vSize); + void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder& matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder& vector, uint32_t nBatch, - armnn::Encoder& outResult) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - for (uint32_t r = 0; r < mRows; r++) - { - vector += b * mCols; - for (uint32_t c = 0; c < mCols; c++) - { - outResult.Set(outResult.Get() + matrix.Get() * vector.Get()); - ++matrix; - ++vector; - } - outResult += 1; - vector -= (b+1) * mCols; - } - matrix -= (mRows * mCols); - } - outResult -= (mRows * nBatch); -} + armnn::Encoder& outResult); void VectorBatchVectorAssign(armnn::Decoder& vector, uint32_t vSize, uint32_t nBatch, - armnn::Encoder& outBatchVector) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - for (uint32_t v = 0; v < vSize; v++) - { - outBatchVector.Set(vector.Get()); - ++outBatchVector; - ++vector; - } - vector -= vSize; - } - outBatchVector -= (nBatch * vSize); -} + armnn::Encoder& outBatchVector); void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder& vector, uint32_t vSize, armnn::Decoder& batchVector, uint32_t nBatch, - armnn::Encoder& outResult) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(outResult.Get() + vector.Get() * batchVector.Get()); - ++outResult; - ++vector; - ++batchVector; - } - vector -= vSize; - } - batchVector -= vSize * nBatch; - outResult -= vSize * nBatch; -} + armnn::Encoder& outResult); + +void VectorBatchVectorCwiseProduct(armnn::Decoder& vector, + uint32_t vSize, + armnn::Decoder& batchVector, + uint32_t nBatch, + armnn::Encoder& outResult); void Sub1Vector(armnn::Decoder& vector, uint32_t vSize, - armnn::Encoder& result) -{ - for (uint32_t v = 0; v < vSize; v++) - { - result.Set(1.0f - vector.Get()); - ++vector; - ++result; - } - vector -= vSize; - result -= vSize; -} + armnn::Encoder& result); + void VectorVectorCwiseProduct(armnn::Decoder& vector1, armnn::Decoder& vector2, uint32_t vSize, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(vector1.Get() * vector2.Get()); - ++outResult; - ++vector1; - ++vector2; - } - outResult -= vSize; - vector1 -= vSize; - vector2 -= vSize; -} + armnn::Encoder& outResult); void VectorVectorCwiseProductAccumulate(armnn::Decoder& vector1, armnn::Decoder& vector2, uint32_t vSize, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(outResult.Get() + vector1.Get() * vector2.Get()); - ++outResult; - ++vector1; - ++vector2; - } - outResult -= vSize; - vector1 -= vSize; - vector2 -= vSize; -} + armnn::Encoder& outResult); float Clip(float f, - float absLimit) -{ - float result = (absLimit < f) ? absLimit : f; - result = (-absLimit > result) ? -absLimit : result; - return result; -} + float absLimit); void ClipVector(armnn::Decoder& vector, uint32_t vSize, float absLimit, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(Clip(vector.Get(), absLimit)); - ++vector; - ++outResult; - } - vector -= vSize; - outResult -= vSize; -} + armnn::Encoder& outResult); void CopyVector(armnn::Decoder& vector, uint32_t vSize, - armnn::Encoder& outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - outResult.Set(vector.Get()); - ++outResult; - ++vector; - } - outResult -= vSize; - vector -= vSize; -} + armnn::Encoder& outResult); void SetActivationParameters(uint32_t activation, armnn::ActivationFunction& outArmnnActivation, float& outA, - float& outB) -{ - switch (activation) - { - case 0: // None - outA = 0; - outB = 0; - return; - - case 1: // Relu - outArmnnActivation = armnn::ActivationFunction::ReLu; - outA = 0; - outB = 0; - return; - - case 3: // Relu6 - outArmnnActivation = armnn::ActivationFunction::BoundedReLu; - outA = 6; - outB = 0; - return; - - case 4: // Tanh - outArmnnActivation = armnn::ActivationFunction::TanH; - outA = 1; - outB = 1; - return; - - case 6: // Sigmoid - outArmnnActivation = armnn::ActivationFunction::Sigmoid; - outA = 0; - outB = 0; - return; - - default: - throw armnn::Exception("Unsupported activation function: " + std::to_string(activation)); - } -} - -std::unique_ptr AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr) -{ - if (!ptr) - { - return nullptr; - } - - return std::make_unique(*ptr); -} - -} // anonymous namespace + float& outB); + +std::unique_ptr AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr); diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp index f8ebc58..70b3443 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefLstmWorkload.cpp @@ -32,6 +32,10 @@ RefLstmWorkload::RefLstmWorkload(const LstmQueueDescriptor &descriptor, const Wo , m_OutputGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_OutputGateBias)) , m_ProjectionWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionWeights)) , m_ProjectionBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionBias)) + , m_InputLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_InputLayerNormWeights)) + , m_ForgetLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_ForgetLayerNormWeights)) + , m_CellLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_CellLayerNormWeights)) + , m_OutputLayerNormWeights (AssignScopedCpuTensorHandle(descriptor.m_OutputLayerNormWeights)) {} void RefLstmWorkload::Execute() const @@ -62,8 +66,9 @@ void RefLstmWorkload::Execute() const const uint32_t nCell = m_InputToOutputWeightsTensor->GetShape()[0]; const uint32_t nOutput = m_RecurrentToOutputWeightsTensor->GetShape()[1]; - const bool useCifg = m_Data.m_Parameters.m_CifgEnabled; - const bool usePeephole = m_Data.m_Parameters.m_PeepholeEnabled; + const bool useCifg = m_Data.m_Parameters.m_CifgEnabled; + const bool usePeephole = m_Data.m_Parameters.m_PeepholeEnabled; + const bool useLayerNorm = m_Data.m_Parameters.m_LayerNormEnabled; // Index the scratch buffers pointers to the global scratch buffer. std::unique_ptr> inputGateScratch = MakeEncoder(outputInfo, m_Data.m_Outputs[0]->Map()); @@ -134,6 +139,26 @@ void RefLstmWorkload::Execute() const std::unique_ptr> projectionWeightsTensor; std::unique_ptr> projectionBiasTensor; + std::unique_ptr> inputLayerNormWeights; + std::unique_ptr> forgetLayerNormWeights; + std::unique_ptr> cellLayerNormWeights; + std::unique_ptr> outputLayerNormWeights; + + if (useLayerNorm) + { + if (!useCifg) + { + inputLayerNormWeights = MakeDecoder( + m_InputLayerNormWeights->GetTensorInfo(), m_InputLayerNormWeights->GetTensor()); + } + forgetLayerNormWeights = MakeDecoder( + m_ForgetLayerNormWeights->GetTensorInfo(), m_ForgetLayerNormWeights->GetTensor()); + cellLayerNormWeights = MakeDecoder( + m_CellLayerNormWeights->GetTensorInfo(), m_CellLayerNormWeights->GetTensor()); + outputLayerNormWeights = MakeDecoder( + m_OutputLayerNormWeights->GetTensorInfo(), m_OutputLayerNormWeights->GetTensor()); + } + if (!useCifg) { inputToInputWeightsTensor = MakeDecoder( @@ -169,18 +194,32 @@ void RefLstmWorkload::Execute() const } } - // Initialize scratch buffers with bias. - if (!useCifg) + if (!useLayerNorm) { - VectorBatchVectorAssign(*inputGateBiasTensor, - nCell, nBatch, *inputGateScratch); + // Initialize scratch buffers with bias. + if (!useCifg) + { + VectorBatchVectorAssign(*inputGateBiasTensor, + nCell, nBatch, *inputGateScratch); + } + VectorBatchVectorAssign(*forgetGateBiasTensor, + nCell, nBatch, *forgetGateScratch); + VectorBatchVectorAssign(*cellBiasTensor, + nCell, nBatch, *cellScratch); + VectorBatchVectorAssign(*outputGateBiasTensor, + nCell, nBatch, *outputGateScratch); + } + else + { + // Initialize scratch buffers with zeroes. + if (!useCifg) + { + ZeroVector(*inputGateScratch, nCell * nBatch); + } + ZeroVector(*forgetGateScratch, nCell * nBatch); + ZeroVector(*cellScratch , nCell * nBatch); + ZeroVector(*outputGateScratch, nCell * nBatch); } - VectorBatchVectorAssign(*forgetGateBiasTensor, - nCell, nBatch, *forgetGateScratch); - VectorBatchVectorAssign(*cellBiasTensor, - nCell, nBatch, *cellScratch); - VectorBatchVectorAssign(*outputGateBiasTensor, - nCell, nBatch, *outputGateScratch); // For each batch and cell: compute input_weight * input. if (!useCifg) @@ -216,6 +255,15 @@ void RefLstmWorkload::Execute() const VectorBatchVectorCwiseProductAccumulate(*cellToInputWeightsTensor, nCell, *cellStateIn, nBatch, *inputGateScratch); } + if (useLayerNorm) + { + MeanStddevNormalization(*inputGateScratchDecoder, + *inputGateScratch, nCell, nBatch, m_LayerNormEpsilon); + VectorBatchVectorCwiseProduct(*inputLayerNormWeights, + nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch); + VectorBatchVectorAdd(*inputGateBiasTensor, + nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch); + } Activation(*inputGateScratchDecoder, *inputGateScratch, TensorInfo({nCell, nBatch}, outputType), ActivationFunction::Sigmoid, 0, 0); @@ -227,11 +275,30 @@ void RefLstmWorkload::Execute() const VectorBatchVectorCwiseProductAccumulate(*cellToForgetWeightsTensor, nCell, *cellStateIn, nBatch, *forgetGateScratch); } + if (useLayerNorm) + { + MeanStddevNormalization(*forgetGateScratchDecoder, + *forgetGateScratch, nCell, nBatch, m_LayerNormEpsilon); + VectorBatchVectorCwiseProduct(*forgetLayerNormWeights, + nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch); + VectorBatchVectorAdd(*forgetGateBiasTensor, + nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch); + } Activation(*forgetGateScratchDecoder, *forgetGateScratch, TensorInfo({nCell, nBatch}, outputType), ActivationFunction::Sigmoid, 0, 0); // For each batch and cell: update the cell. + if (useLayerNorm) + { + MeanStddevNormalization(*cellScratchDecoder, + *cellScratch, nCell, nBatch, m_LayerNormEpsilon); + VectorBatchVectorCwiseProduct(*cellLayerNormWeights, + nCell, *cellScratchDecoder, nBatch, *cellScratch); + VectorBatchVectorAdd(*cellBiasTensor, + nCell, *cellScratchDecoder, nBatch, *cellScratch); + } + VectorVectorCwiseProduct(*forgetGateScratchDecoder, *cellStateIn, nBatch * nCell, *cellStateOut); ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid; @@ -267,6 +334,15 @@ void RefLstmWorkload::Execute() const VectorBatchVectorCwiseProductAccumulate(*cellToOutputWeightsTensor, nCell, *cellStateOutDecoder, nBatch, *outputGateScratch); } + if (useLayerNorm) + { + MeanStddevNormalization(*outputGateScratchDecoder, + *outputGateScratch, nCell, nBatch, m_LayerNormEpsilon); + VectorBatchVectorCwiseProduct(*outputLayerNormWeights, + nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch); + VectorBatchVectorAdd(*outputGateBiasTensor, + nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch); + } Activation(*outputGateScratchDecoder, *outputGateScratch, TensorInfo({nCell, nBatch}, outputType), ActivationFunction::Sigmoid, 0, 0); diff --git a/src/backends/reference/workloads/RefLstmWorkload.hpp b/src/backends/reference/workloads/RefLstmWorkload.hpp index 38e3fb9..ce5a775 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefLstmWorkload.hpp @@ -38,6 +38,12 @@ private: std::unique_ptr m_OutputGateBiasTensor; std::unique_ptr m_ProjectionWeightsTensor; std::unique_ptr m_ProjectionBiasTensor; + std::unique_ptr m_InputLayerNormWeights; + std::unique_ptr m_ForgetLayerNormWeights; + std::unique_ptr m_CellLayerNormWeights; + std::unique_ptr m_OutputLayerNormWeights; + + float m_LayerNormEpsilon = static_cast(1e-8); }; } //namespace armnn -- 2.7.4