From: James Conroy Date: Mon, 8 Jun 2020 13:53:10 +0000 (+0100) Subject: IVGCVSW-4860 Add tests to verify QLstm projection X-Git-Tag: submit/tizen/20210421.062230~488 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b22a75e2aaec1175bbacba54e1a33a83f9749ce2;p=platform%2Fupstream%2Farmnn.git IVGCVSW-4860 Add tests to verify QLstm projection * Adds int16 output tensor to CpuRef impl to prevent overflow when accumulating output after projection. * Adds two remaining tests to verify QLstm on CpuRef. Signed-off-by: James Conroy Change-Id: I93d7c64c4a9cc1012cb2bc052d598d4279fbd372 --- diff --git a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp index c7f902a..08ee440 100644 --- a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.cpp @@ -1733,7 +1733,7 @@ LayerTestResult QuantizedLstmTestImpl( return ret; } -// QLSTM +// QLSTM: CIFG, LayerNorm LayerTestResult QLstmTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, @@ -1969,6 +1969,528 @@ LayerTestResult QLstmTestImpl( return ret; } +// QLSTM: Projection, LayerNorm +LayerTestResult QLstmTestImpl1( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array& input, + const boost::multi_array& outputExpected) +{ + IgnoreUnused(memoryManager); + unsigned int numBatches = 2; + unsigned int inputSize = 5; + unsigned int outputSize = 3; + unsigned int numUnits = 4; + + bool cifgEnabled = false; + bool peepholeEnabled = false; + bool projectionEnabled = true; + bool layerNormEnabled = true; + + // Scale/Offset quantization info + float inputScale = 0.0078125f; + int32_t inputOffset = 0; + + int32_t hiddenStateZeroPoint = 0; + float hiddenStateScale = 0.007f; + + // if (!projectionEnabled) outputScale == hiddenStateScale + float outputScale = 3.05176e-05f; + int32_t outputOffset = 0; + + float cellStateScale = 3.05176e-05f; + int32_t cellStateOffset = 0; + + float weightsScale = 0.00784314f; + int32_t weightsOffset = 0; + + float layerNormScale = 3.05182e-05f; + int32_t layerNormOffset = 0; + + float biasScale = layerNormScale / 1024; + int32_t biasOffset = 0; + + float projectionWeightsScale = 0.00392157f; + + float inputIntermediateScale = 0.007059f; + float forgetIntermediateScale = 0.007812f; + float cellIntermediateScale = inputIntermediateScale; + float outputIntermediateScale = forgetIntermediateScale; + + float cellClip = 0.0f; + float projectionClip = 0.0f; + + // Input/Output tensor info + armnn::TensorInfo inputInfo({numBatches , inputSize}, + armnn::DataType::QAsymmS8, + inputScale, + inputOffset); + + armnn::TensorInfo cellStateInfo({numBatches , numUnits}, + armnn::DataType::QSymmS16, + cellStateScale, + cellStateOffset); + + armnn::TensorInfo outputStateInfo({numBatches , outputSize}, + armnn::DataType::QAsymmS8, + outputScale, + outputOffset); + + LayerTestResult ret(outputStateInfo); + + // Input tensors + std::vector inputVector; + inputVector.assign(input.data(), input.data() + (numBatches * inputSize)); + auto inputTensor = MakeTensor(inputInfo, inputVector); + + std::vector cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0}; + auto cellStateInTensor = MakeTensor(cellStateInfo, cellStateInVector); + + std::vector outputStateInVector = {0, 0, 0, 0, 0, 0}; + auto outputStateInTensor = MakeTensor(outputStateInfo, outputStateInVector); + + // Output tensors + std::vector cellStateOutVector = {-14650, 8939, 5771, 6715, -11843, 7847, 1508, 12939}; + auto cellStateOutTensor = MakeTensor(cellStateInfo, cellStateOutVector); + + std::vector outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize)); + ret.outputExpected = MakeTensor(outputStateInfo, outputVector); + + // Create tensor handles + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInfo); + + std::unique_ptr outputStateOutHandle = workloadFactory.CreateTensorHandle(outputStateInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputStateInfo); + + armnn::QLstmQueueDescriptor data; + armnn::WorkloadInfo info; + + // Add inputs and outputs to workload + AddInputToWorkload(data, info, inputInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, outputStateInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get()); + + // Weights and bias tensor and quantization info + armnn::TensorInfo inputWeightsInfo({numUnits, inputSize}, + armnn::DataType::QSymmS8, + weightsScale, + weightsOffset); + + armnn::TensorInfo recurrentWeightsInfo({numUnits, outputSize}, + armnn::DataType::QSymmS8, + weightsScale, + weightsOffset); + + armnn::TensorInfo biasInfo({numUnits}, armnn::DataType::Signed32, biasScale, biasOffset); + + armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset); + + armnn::TensorInfo projectionWeightsInfo({outputSize, numUnits}, + armnn::DataType::QSymmS8, + projectionWeightsScale, + 0); + + // Weights and bias tensor data + auto inputToInputWeights = MakeTensor(inputWeightsInfo, + {64, 77, 89, -102, -115, 13, 25, 38, -51, 64, -102, 89, -77, 64, -51, -64, -51, -38, -25, -13}); + auto inputToForgetWeights = MakeTensor(inputWeightsInfo, + {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64}); + auto inputToCellWeights = MakeTensor(inputWeightsInfo, + {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77}); + auto inputToOutputWeights = MakeTensor(inputWeightsInfo, + {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51}); + + auto recurrentToInputWeights = MakeTensor(recurrentWeightsInfo, + {-25, -38, 51, 13, -64, 115, -25, -38, -89, 6, -25, -77}); + auto recurrentToForgetWeights = MakeTensor(recurrentWeightsInfo, + {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25}); + auto recurrentToCellWeights = MakeTensor(recurrentWeightsInfo, + {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25}); + auto recurrentToOutputWeights = MakeTensor(recurrentWeightsInfo, + {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25}); + + auto inputGateBias = MakeTensor(biasInfo, {644245, 3221226, 4724464, 8160438}); + auto forgetGateBias = MakeTensor(biasInfo, {2147484, -6442451, -4294968, 2147484}); + auto cellBias = MakeTensor(biasInfo, {-1073742, 15461883, 5368709, 1717987}); + auto outputGateBias = MakeTensor(biasInfo, {1073742, -214748, 4294968, 2147484}); + + auto inputLayerNormWeights = MakeTensor(layerNormWeightsInfo, {3277, 6553, 9830, 16384}); + auto forgetLayerNormWeights = MakeTensor(layerNormWeightsInfo, {6553, 6553, 13107, 9830}); + auto cellLayerNormWeights = MakeTensor(layerNormWeightsInfo, {22937, 6553, 9830, 26214}); + auto outputLayerNormWeights = MakeTensor(layerNormWeightsInfo, {19660, 6553, 6553, 16384}); + + auto projectionWeights = MakeTensor(projectionWeightsInfo, + {-25, 51, 3, -51, 25, 127, 77, 20, 18, 51, -102, 51}); + + // ScopedCpuTensorHandles + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(inputWeightsInfo); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo); + + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(recurrentWeightsInfo); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo); + + armnn::ScopedCpuTensorHandle inputGateBiasTensor(biasInfo); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo); + armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo); + + armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(layerNormWeightsInfo); + armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(layerNormWeightsInfo); + armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(layerNormWeightsInfo); + armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(layerNormWeightsInfo); + + armnn::ScopedCpuTensorHandle projectionWeightsTensor(projectionWeightsInfo); + + // Allocate and copy data + AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + + AllocateAndCopyDataToITensorHandle(&inputLayerNormWeightsTensor, &inputLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]); + + AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]); + + // Setup queue descriptor + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + + data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor; + data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor; + data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor; + data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor; + + data.m_ProjectionWeights = &projectionWeightsTensor; + + data.m_Parameters.m_CifgEnabled = cifgEnabled; + data.m_Parameters.m_PeepholeEnabled = peepholeEnabled; + data.m_Parameters.m_ProjectionEnabled = projectionEnabled; + data.m_Parameters.m_LayerNormEnabled = layerNormEnabled; + + data.m_Parameters.m_InputIntermediateScale = inputIntermediateScale; + data.m_Parameters.m_ForgetIntermediateScale = forgetIntermediateScale; + data.m_Parameters.m_CellIntermediateScale = cellIntermediateScale; + data.m_Parameters.m_OutputIntermediateScale = outputIntermediateScale; + + data.m_Parameters.m_HiddenStateZeroPoint = hiddenStateZeroPoint; + data.m_Parameters.m_HiddenStateScale = hiddenStateScale; + + data.m_Parameters.m_CellClip = cellClip; + data.m_Parameters.m_ProjectionClip = projectionClip; + + // Create workload and allocate tensor handles + std::unique_ptr workload = workloadFactory.CreateQLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; +} + +// QLSTM: Projection, CIFG, LayerNorm +LayerTestResult QLstmTestImpl2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array& input, + const boost::multi_array& outputExpected) +{ + IgnoreUnused(memoryManager); + unsigned int numBatches = 2; + unsigned int inputSize = 5; + unsigned int outputSize = 3; + unsigned int numUnits = 4; + + bool cifgEnabled = true; + bool peepholeEnabled = false; + bool projectionEnabled = true; + bool layerNormEnabled = true; + + // Scale/Offset quantization info + float inputScale = 0.0078125f; + int32_t inputOffset = 0; + + int32_t hiddenStateZeroPoint = 0; + float hiddenStateScale = 0.007f; + + // if (!projectionEnabled) outputScale == hiddenStateScale + float outputScale = 3.05176e-05f; + int32_t outputOffset = 0; + + float cellStateScale = 3.05176e-05f; + int32_t cellStateOffset = 0; + + float weightsScale = 0.00784314f; + int32_t weightsOffset = 0; + + float layerNormScale = 3.05182e-05f; + int32_t layerNormOffset = 0; + + float biasScale = layerNormScale / 1024; + int32_t biasOffset = 0; + + float projectionWeightsScale = 0.00392157f; + + float inputIntermediateScale = 0.007059f; + float forgetIntermediateScale = 0.007812f; + float cellIntermediateScale = inputIntermediateScale; + float outputIntermediateScale = forgetIntermediateScale; + + float cellClip = 0.0f; + float projectionClip = 0.0f; + + // Input/Output tensor info + armnn::TensorInfo inputInfo({numBatches , inputSize}, + armnn::DataType::QAsymmS8, + inputScale, + inputOffset); + + armnn::TensorInfo cellStateInfo({numBatches , numUnits}, + armnn::DataType::QSymmS16, + cellStateScale, + cellStateOffset); + + armnn::TensorInfo outputStateInfo({numBatches , outputSize}, + armnn::DataType::QAsymmS8, + outputScale, + outputOffset); + + LayerTestResult ret(outputStateInfo); + + // Input tensors + std::vector inputVector; + inputVector.assign(input.data(), input.data() + (numBatches * inputSize)); + auto inputTensor = MakeTensor(inputInfo, inputVector); + + std::vector cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0}; + auto cellStateInTensor = MakeTensor(cellStateInfo, cellStateInVector); + + std::vector outputStateInVector = {0, 0, 0, 0, 0, 0}; + auto outputStateInTensor = MakeTensor(outputStateInfo, outputStateInVector); + + // Output tensors + std::vector cellStateOutVector = {-14650, 8939, 5771, 6715, -11843, 7847, 1508, 12939}; + auto cellStateOutTensor = MakeTensor(cellStateInfo, cellStateOutVector); + + std::vector outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize)); + ret.outputExpected = MakeTensor(outputStateInfo, outputVector); + + // Create tensor handles + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInfo); + + std::unique_ptr outputStateOutHandle = workloadFactory.CreateTensorHandle(outputStateInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputStateInfo); + + armnn::QLstmQueueDescriptor data; + armnn::WorkloadInfo info; + + // Add inputs and outputs to workload + AddInputToWorkload(data, info, inputInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, outputStateInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get()); + + // Weights and bias tensor and quantization info + armnn::TensorInfo inputWeightsInfo({numUnits, inputSize}, + armnn::DataType::QSymmS8, + weightsScale, + weightsOffset); + + armnn::TensorInfo recurrentWeightsInfo({numUnits, outputSize}, + armnn::DataType::QSymmS8, + weightsScale, + weightsOffset); + + armnn::TensorInfo biasInfo({numUnits}, armnn::DataType::Signed32, biasScale, biasOffset); + + armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset); + + armnn::TensorInfo projectionWeightsInfo({outputSize, numUnits}, + armnn::DataType::QSymmS8, + projectionWeightsScale, + 0); + + // Weights and bias tensor data + auto inputToForgetWeights = MakeTensor(inputWeightsInfo, + {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64}); + auto inputToCellWeights = MakeTensor(inputWeightsInfo, + {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77}); + auto inputToOutputWeights = MakeTensor(inputWeightsInfo, + {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51}); + + auto recurrentToForgetWeights = MakeTensor(recurrentWeightsInfo, + {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25}); + auto recurrentToCellWeights = MakeTensor(recurrentWeightsInfo, + {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25}); + auto recurrentToOutputWeights = MakeTensor(recurrentWeightsInfo, + {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25}); + + auto forgetGateBias = MakeTensor(biasInfo, {2147484, -6442451, -4294968, 2147484}); + auto cellBias = MakeTensor(biasInfo, {-1073742, 15461883, 5368709, 1717987}); + auto outputGateBias = MakeTensor(biasInfo, {1073742, -214748, 4294968, 2147484}); + + auto forgetLayerNormWeights = MakeTensor(layerNormWeightsInfo, {6553, 6553, 13107, 9830}); + auto cellLayerNormWeights = MakeTensor(layerNormWeightsInfo, {22937, 6553, 9830, 26214}); + auto outputLayerNormWeights = MakeTensor(layerNormWeightsInfo, {19660, 6553, 6553, 16384}); + + auto projectionWeights = MakeTensor(projectionWeightsInfo, + {-25, 51, 3, -51, 25, 127, 77, 20, 18, 51, -102, 51}); + + // ScopedCpuTensorHandles + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo); + + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo); + + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo); + armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo); + + armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(layerNormWeightsInfo); + armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(layerNormWeightsInfo); + armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(layerNormWeightsInfo); + + armnn::ScopedCpuTensorHandle projectionWeightsTensor(projectionWeightsInfo); + + // Allocate and copy data + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + + AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]); + AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]); + + AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]); + + // Setup queue descriptor + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + + data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor; + data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor; + data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor; + + data.m_ProjectionWeights = &projectionWeightsTensor; + + data.m_Parameters.m_CifgEnabled = cifgEnabled; + data.m_Parameters.m_PeepholeEnabled = peepholeEnabled; + data.m_Parameters.m_ProjectionEnabled = projectionEnabled; + data.m_Parameters.m_LayerNormEnabled = layerNormEnabled; + + data.m_Parameters.m_InputIntermediateScale = inputIntermediateScale; + data.m_Parameters.m_ForgetIntermediateScale = forgetIntermediateScale; + data.m_Parameters.m_CellIntermediateScale = cellIntermediateScale; + data.m_Parameters.m_OutputIntermediateScale = outputIntermediateScale; + + data.m_Parameters.m_HiddenStateZeroPoint = hiddenStateZeroPoint; + data.m_Parameters.m_HiddenStateScale = hiddenStateScale; + + data.m_Parameters.m_CellClip = cellClip; + data.m_Parameters.m_ProjectionClip = projectionClip; + + // Create workload and allocate tensor handles + std::unique_ptr workload = workloadFactory.CreateQLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; +} + } // anonymous namespace @@ -2360,3 +2882,33 @@ LayerTestResult QLstmTest( return QLstmTestImpl(workloadFactory, memoryManager, input, expectedOutput); } + +LayerTestResult QLstmTest1( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + armnn::TensorInfo inputDesc({2, 5}, armnn::DataType::QAsymmS8); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {90, 102, 13, 26, 38, 102, 13, 26, 51, 64})); + + armnn::TensorInfo outputDesc({2, 3}, armnn::DataType::QAsymmS8); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {127, 127, -108, -67, 127, 127})); + + return QLstmTestImpl1(workloadFactory, memoryManager, input, expectedOutput); +} + +LayerTestResult QLstmTest2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + armnn::TensorInfo inputDesc({2, 5}, armnn::DataType::QAsymmS8); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {90, 102, 13, 26, 38, 102, 13, 26, 51, 64})); + + armnn::TensorInfo outputDesc({2, 3}, armnn::DataType::QAsymmS8); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {127, 127, 127, -128, 127, 127})); + + return QLstmTestImpl2(workloadFactory, memoryManager, input, expectedOutput); +} \ No newline at end of file diff --git a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp index f1180ae..6e29345 100644 --- a/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp +++ b/src/backends/backendsCommon/test/layerTests/LstmTestImpl.hpp @@ -66,3 +66,11 @@ LayerTestResult QuantizedLstmTest( LayerTestResult QLstmTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult QLstmTest1( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult QLstmTest2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); \ No newline at end of file diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index eb2d0c5..09096b4 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -1283,6 +1283,8 @@ ARMNN_AUTO_TEST_CASE(LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16Constant, // QLstm ARMNN_AUTO_TEST_CASE(QLstm, QLstmTest) +ARMNN_AUTO_TEST_CASE(QLstm1, QLstmTest1) +ARMNN_AUTO_TEST_CASE(QLstm2, QLstmTest2) // Convert from BFloat16 to Float32 ARMNN_AUTO_TEST_CASE(ConvertBf16ToFp32, ConvertBf16ToFp32Test) diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp index 34d048b..e11ea55 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp @@ -146,6 +146,7 @@ void RefQLstmWorkload::Execute() const std::vector forgetGateData(stateTensorSize); std::vector outputGateData(stateTensorSize); std::vector hiddenStateData(stateTensorSize); + std::vector outputInt16Data(numBatches * outputSize); armnn::TensorInfo inputGateInfo( {numBatches , numUnits}, armnn::DataType::QSymmS16, m_Data.m_Parameters.m_InputIntermediateScale, 0); @@ -159,6 +160,10 @@ void RefQLstmWorkload::Execute() const armnn::DataType::QAsymmS8, m_Data.m_Parameters.m_HiddenStateScale, m_Data.m_Parameters.m_HiddenStateZeroPoint); + armnn::TensorInfo outputInt16Info({numBatches , outputSize}, + armnn::DataType::QSymmS16, + outputInfo.GetQuantizationScale(), + outputInfo.GetQuantizationOffset()); // Decoders/Encoders for internal states std::unique_ptr> inputGateDecoder = @@ -183,6 +188,12 @@ void RefQLstmWorkload::Execute() const std::unique_ptr> hiddenStateEncoder = MakeEncoder(hiddenStateInfo, hiddenStateData.data()); + // Int16 used to accumulate output to prevent overflowing (after Projection MatMul) + std::unique_ptr> outputInt16Decoder = + MakeDecoder(outputInt16Info, outputInt16Data.data()); + std::unique_ptr> outputInt16Encoder = + MakeEncoder(outputInt16Info, outputInt16Data.data()); + // Create decoders for optional params if they are enabled if (!cifgEnabled) { @@ -494,12 +505,13 @@ void RefQLstmWorkload::Execute() const { if (m_ProjectionBiasTensor) { - VectorBatchVectorAssign(*projectionBiasDecoder, - outputSize, numBatches, *outputEncoder); + VectorBatchVectorAssign(*projectionBiasDecoder, outputSize, numBatches, *outputInt16Encoder); } - MatrixBatchVectorMultiplyAccumulate(*projectionWeightsDecoder, - outputSize, numUnits, *hiddenStateDecoder, numBatches, *outputEncoder); + MatrixBatchVectorMultiplyAccumulate(*projectionWeightsDecoder, outputSize, numUnits, *hiddenStateDecoder, + numBatches, *outputInt16Encoder); + + CopyVector(*outputInt16Decoder, numBatches * outputSize, *outputEncoder); if (m_Data.m_Parameters.m_ProjectionClip > 0.0) {