[OpenMP] Change the nextgen plugin kernel thread count scheme as old plugins'
authorYe Luo <yeluo@anl.gov>
Tue, 20 Dec 2022 00:26:38 +0000 (18:26 -0600)
committerYe Luo <yeluo@anl.gov>
Tue, 20 Dec 2022 00:27:02 +0000 (18:27 -0600)
Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D140352

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

index eb03e3b..83b656a 100644 (file)
@@ -36,8 +36,6 @@ AsyncInfoWrapperTy::~AsyncInfoWrapperTy() {
 Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
                             DeviceImageTy &Image) {
   PreferredNumThreads = getDefaultNumThreads(GenericDevice);
-  if (isGenericMode())
-    PreferredNumThreads += GenericDevice.getWarpSize();
 
   MaxNumThreads = GenericDevice.getThreadLimit();
 
@@ -92,6 +90,9 @@ void *GenericKernelTy::prepareArgs(GenericDeviceTy &GenericDevice,
 
 uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
                                         uint32_t ThreadLimitClause) const {
+  if (ThreadLimitClause > 0 && isGenericMode())
+    ThreadLimitClause += GenericDevice.getWarpSize();
+
   return std::min(MaxNumThreads, (ThreadLimitClause > 0) ? ThreadLimitClause
                                                          : PreferredNumThreads);
 }