COMPMID-3349: Fixed performance regression Mali-G71
authorGian Marco Iodice <gianmarco.iodice@arm.com>
Tue, 14 Apr 2020 11:09:43 +0000 (12:09 +0100)
committerGian Marco Iodice <gianmarco.iodice@arm.com>
Tue, 14 Apr 2020 16:27:09 +0000 (16:27 +0000)
Change-Id: I2c9cc9ebd7fe6cc6431d25ac795046b0539db616
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3014
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp

index 94311fb3aa87d8a3e60951dc8bb7e006b39bd410..04906e317fbb84ab46d83192a917fe79c8d8a658 100644 (file)
@@ -44,6 +44,8 @@ public:
     CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams &params) override;
 
 private:
+    CLGEMMKernelType g76_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
+    CLGEMMKernelType g71_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
     CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
     CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
     CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
index 4542f53136f0812a5002c3108eae2ec132f9166d..d30eaa9edc8fe8a511b644631da000736c5a25e2 100644 (file)
@@ -46,8 +46,8 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::select_kernel(const CLGEMMKernelS
 
     using FunctionExecutorPtr = CLGEMMKernelType (CLGEMMKernelSelectionBifrost::*)(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
 
-    // Configurations for Bifrost architectures
-    static std::map<DataType, FunctionExecutorPtr> gemm_configs =
+    // Default configurations for Bifrost architectures
+    static std::map<DataType, FunctionExecutorPtr> gemm_default_configs =
     {
         { DataType::F32, &CLGEMMKernelSelectionBifrost::default_f32 },
         { DataType::F16, &CLGEMMKernelSelectionBifrost::default_f16 },
@@ -57,14 +57,51 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::select_kernel(const CLGEMMKernelS
         { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionBifrost::default_q8 }
     };
 
+    // Mali-G71 configurations
+    static std::map<DataType, FunctionExecutorPtr> gemm_g71_configs =
+    {
+        { DataType::F32, &CLGEMMKernelSelectionBifrost::default_f32 },
+        { DataType::F16, &CLGEMMKernelSelectionBifrost::g71_f16 },
+        { DataType::QASYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+        { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionBifrost::default_q8 },
+        { DataType::QSYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+        { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionBifrost::default_q8 }
+    };
+
+    // Mali-G76 configurations
+    static std::map<DataType, FunctionExecutorPtr> gemm_g76_configs =
+    {
+        { DataType::F32, &CLGEMMKernelSelectionBifrost::g76_f32 },
+        { DataType::F16, &CLGEMMKernelSelectionBifrost::default_f16 },
+        { DataType::QASYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+        { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionBifrost::default_q8 },
+        { DataType::QSYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+        { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionBifrost::default_q8 }
+    };
+
     const DataType data_type = params.data_type;
 
-    if(gemm_configs.find(data_type) != gemm_configs.end())
+    switch(_target)
     {
-        return (this->*gemm_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+        case GPUTarget::G71:
+            if(gemm_g71_configs.find(data_type) != gemm_g71_configs.end())
+            {
+                return (this->*gemm_g71_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+            }
+            ARM_COMPUTE_ERROR("Not supported data type");
+        case GPUTarget::G76:
+            if(gemm_g76_configs.find(data_type) != gemm_g76_configs.end())
+            {
+                return (this->*gemm_g76_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+            }
+            ARM_COMPUTE_ERROR("Not supported data type");
+        default:
+            if(gemm_default_configs.find(data_type) != gemm_default_configs.end())
+            {
+                return (this->*gemm_default_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+            }
+            ARM_COMPUTE_ERROR("Not supported data type");
     }
-
-    ARM_COMPUTE_ERROR("Not supported data type");
 }
 
 CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
@@ -75,7 +112,7 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsig
     {
         if((m > 1) && (n < 16))
         {
-            gemm_type = CLGEMMKernelType::RESHAPED;
+            gemm_type = CLGEMMKernelType::RESHAPED_V1;
         }
         else if(m == 1)
         {
@@ -85,19 +122,48 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsig
         {
             if((k > 256) && (m > 4))
             {
-                gemm_type = CLGEMMKernelType::RESHAPED;
+                constexpr float alpha = 3.2f;
+                constexpr float fact0 = 1.51f;
+                constexpr float fact1 = 1.66f;
+                constexpr float ops   = 12.0f;
+                const float     scale = k > 1024 ? 1.07f : 1.0f;
+                gemm_type             = (alpha + ((n * fact0) / ops) < ((fact1 * n * scale) / ops)) ? CLGEMMKernelType::RESHAPED_V1 : CLGEMMKernelType::NATIVE_V1;
             }
             else
             {
-                gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS;
+                gemm_type = CLGEMMKernelType::NATIVE_V1;
             }
         }
+
+        const auto workload = static_cast<float>((m * n) / 20.0f);
+
+        gemm_type = ((workload > 1600.0f) && (gemm_type == CLGEMMKernelType::RESHAPED_V1)) ? CLGEMMKernelType::RESHAPED : gemm_type;
     }
 
     return gemm_type;
 }
 
 CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
+{
+    ARM_COMPUTE_UNUSED(n, k);
+    if(is_rhs_constant)
+    {
+        if(m == 1)
+        {
+            return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+        }
+        else
+        {
+            return CLGEMMKernelType::RESHAPED;
+        }
+    }
+    else
+    {
+        return CLGEMMKernelType::NATIVE_V1;
+    }
+}
+
+CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
 {
     if(is_rhs_constant)
     {
@@ -123,13 +189,43 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f16(unsigned int m, unsig
     }
 }
 
-CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
+CLGEMMKernelType CLGEMMKernelSelectionBifrost::g76_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
+{
+    CLGEMMKernelType gemm_type = CLGEMMKernelType::NATIVE_V1;
+
+    if(is_rhs_constant)
+    {
+        if((m > 1) && (n < 16))
+        {
+            gemm_type = CLGEMMKernelType::RESHAPED;
+        }
+        else if(m == 1)
+        {
+            gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS;
+        }
+        else
+        {
+            if((k > 256) && (m > 4))
+            {
+                gemm_type = CLGEMMKernelType::RESHAPED;
+            }
+            else
+            {
+                gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS;
+            }
+        }
+    }
+
+    return gemm_type;
+}
+
+CLGEMMKernelType CLGEMMKernelSelectionBifrost::g71_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
 {
     if(is_rhs_constant)
     {
         if(m == 1)
         {
-            if((n > k) && gpu_target_is_in(_target, GPUTarget::G71))
+            if(n > k)
             {
                 return CLGEMMKernelType::NATIVE_V1;
             }