Integrate MLGO into CLGEMM and CLGEMMLowpMatrixMultiplyCore: Part4

author SiCong Li <sicong.li@arm.com>

Thu, 4 Feb 2021 13:12:19 +0000 (13:12 +0000)

committer SiCong Li <sicong.li@arm.com>

Tue, 9 Feb 2021 13:45:09 +0000 (13:45 +0000)
author SiCong Li <sicong.li@arm.com>
Thu, 4 Feb 2021 13:12:19 +0000 (13:12 +0000)
committer SiCong Li <sicong.li@arm.com>
Tue, 9 Feb 2021 13:45:09 +0000 (13:45 +0000)
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp

index 4bf5bde61ef58726b8262ceba15ae18b3cb780c1..6c4d9ef54ac2b4b0de35781ba707f5f8272ef9b4 100644 (file)
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -27,6 +27,7 @@
  #include "arm_compute/core/Error.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Log.h"
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Types.h"
  #include "arm_compute/core/Validate.h"
@@ -44,6 +45,8 @@
  #include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
  #include "src/core/helpers/AutoConfiguration.h"
  #include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
+#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
+#include "utils/TypePrinter.h"
  
  namespace arm_compute
  {
@@ -52,19 +55,61 @@ using namespace arm_compute::cl_gemm;
  
  namespace
  {
-inline bool is_gemm_reshaped(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run)
+inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output,
+                                                    unsigned int m, unsigned int n, unsigned int k, bool reinterpret_input_as_3d, int depth_output_gemm3d)
  {
-    std::unique_ptr<ICLGEMMKernelSelection> gemm_kernel = CLGEMMKernelSelectionFactory::create(CLScheduler::get().target());
-    ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_kernel.get());
+    // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
+    TensorInfo tmp_b_info{};
+    // Validate reshape RHS kernel
+    auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
+    if(!bool(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
+    {
+        return false;
+    }
+    // Validate mm kernel
+    // NOTE: Ignore all other parameters (eg. depth_output_gemm3d, output stage etc.) and only validate lhs and rhs info
+    // NOTE: This assumes:
+    //  1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_arguments).
+    //  2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_and_configure_window).
+    GEMMKernelInfo gemm_kernel_info;
+    gemm_kernel_info.m                       = m;
+    gemm_kernel_info.n                       = n;
+    gemm_kernel_info.k                       = k;
+    gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
+    gemm_kernel_info.depth_output_gemm3d     = depth_output_gemm3d;
+    gemm_kernel_info.lhs_info                = lhs_info;
+    gemm_kernel_info.rhs_info                = rhs_info;
+    // Since we ignore the output stage, output data type has to be S32 to pass the validation
+    TensorInfo output_info_copy(*output);
+    output_info_copy.set_data_type(DataType::S32);
+    if(!bool(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, &output_info_copy, gemm_kernel_info)))
+    {
+        return false;
+    }
+    return true;
+}
  
-    CLGEMMKernelSelectionParams params;
-    params.m               = m;
-    params.n               = n;
-    params.k               = k;
-    params.is_rhs_constant = reshape_b_only_on_first_run;
-    params.data_type       = data_type;
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, bool reinterpret_input_as_3d, int depth_output_gemm3d,
+                                                                                          const ITensorInfo *a,
+                                                                                          const ITensorInfo *b, const ITensorInfo *output)
+{
+    auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
+    if(config)
+    {
+        if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, output, query.m, query.n, query.k, reinterpret_input_as_3d, depth_output_gemm3d))
+        {
+            ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
+            return { config.lhs_info, config.rhs_info };
+        }
+    }
+    config = select_default_gemm_config_reshaped_only_rhs(query);
+    ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
+    return { config.lhs_info, config.rhs_info };
+}
  
-    switch(gemm_kernel->select_kernel(params))
+inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
+{
+    switch(kernel_type)
      {
          case CLGEMMKernelType::NATIVE:
              return false;
@@ -151,7 +196,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
      const int          depth_output_gemm3d     = gemm_info.depth_output_gemm3d();
  
      // Check if we need to reshape the matrix A and matrix B
-    _is_gemm_reshaped = is_gemm_reshaped(m, n, k, a->info()->data_type(), _reshape_b_only_on_first_run);
+    _is_gemm_reshaped = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->info()->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run));
  
      if(_convert_to_qasymm8)
      {
@@ -173,8 +218,10 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
          }
  
          // Pick up the GEMM configuration
-        // Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED doesn't matter, since it only affect the shape configuration
-        std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
+        // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
+        std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, reinterpret_input_as_3d,
+                                                                                 depth_output_gemm3d,
+                                                                                 a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), output->info());
  
          // Configure reshape RHS kernel
          _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
@@ -344,7 +391,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
      const unsigned int batch_size              = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
      const int          depth_output_gemm3d     = gemm_info.depth_output_gemm3d();
  
-    bool reshape_matrix_b = is_gemm_reshaped(m, n, k, a->data_type(), gemm_info.reshape_b_only_on_first_run());
+    bool reshape_matrix_b = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->data_type(), m, n, k, batch_size }, gemm_info.reshape_b_only_on_first_run()));
  
      const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
  
@@ -363,7 +410,11 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
          matrix_b_info = &tmp_b_info;
  
          // Pick up the GEMM configuration
-        std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
+        // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
+        // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
+        const auto res = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
+        lhs_info       = res.lhs_info;
+        rhs_info       = res.rhs_info;
  
          // Validate reshape RHS kernel
          auto_init_if_empty(tmp_b_info, weights_info.clone()->set_tensor_shape(compute_rhs_reshaped_shape(weights_info, rhs_info)));
author	SiCong Li <sicong.li@arm.com>
	Thu, 4 Feb 2021 13:12:19 +0000 (13:12 +0000)
committer	SiCong Li <sicong.li@arm.com>
	Tue, 9 Feb 2021 13:45:09 +0000 (13:45 +0000)