IVGCVSW-3608 Fix Neon depthwise convolution 5x5 failure
authorMatthew Jackson <matthew.jackson@arm.com>
Fri, 2 Aug 2019 13:53:10 +0000 (14:53 +0100)
committerMatteo Martincigh <matteo.martincigh@arm.com>
Wed, 7 Aug 2019 10:10:36 +0000 (10:10 +0000)
* Fix issued caused by layers with 5x5 filters and depth multipliers > 1

Signed-off-by: Matthew Jackson <matthew.jackson@arm.com>
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: I58435a1f0e3c7e69861dc130fad525a01e2a849d

src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp

index 400ae18..18085ed 100644 (file)
@@ -113,11 +113,23 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
 
     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
 
-    // Check for optimisation opportunities.
-    const bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3);
-    const bool use5x5Optimisation = (weightInfo.GetShape()[2] == 5) && (weightInfo.GetShape()[3] == 5);
-
-    if (use3x3Optimisation||use5x5Optimisation)
+    const arm_compute::ITensorInfo* inputInfo  = input.info();
+    const arm_compute::ITensorInfo* kernelInfo = m_KernelTensor->info();
+    const arm_compute::ITensorInfo* biasInfo   = m_BiasTensor ? m_BiasTensor->info() : nullptr;
+    const arm_compute::ITensorInfo* outputInfo = output.info();
+
+    // Check for optimisation opportunities
+    arm_compute::Status optimizationStatus =
+        arm_compute::NEDepthwiseConvolutionLayerOptimized::validate(inputInfo,
+                                                                    kernelInfo,
+                                                                    biasInfo,
+                                                                    outputInfo,
+                                                                    padStrideInfo,
+                                                                    depthMultiplier,
+                                                                    arm_compute::ActivationLayerInfo(),
+                                                                    aclDilationInfo);
+
+    if (optimizationStatus.error_code() == arm_compute::ErrorCode::OK)
     {
         m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayerOptimized>();
         static_cast<arm_compute::NEDepthwiseConvolutionLayerOptimized*>(