OpenCL: core support for FP16, more channel orders

author Joe Howse <josephhowse@nummist.com>

Mon, 21 Jun 2021 03:46:32 +0000 (00:46 -0300)

committer Joe Howse <josephhowse@nummist.com>

Wed, 30 Jun 2021 17:14:37 +0000 (14:14 -0300)
author Joe Howse <josephhowse@nummist.com>
Mon, 21 Jun 2021 03:46:32 +0000 (00:46 -0300)
committer Joe Howse <josephhowse@nummist.com>
Wed, 30 Jun 2021 17:14:37 +0000 (14:14 -0300)
diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp

index 5e5c846ad0598c9be7c511b97a0fe227336c5570..3ead76e5c46ea21da85a97a9c952a95729e9117a 100644 (file)
--- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp
+++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp
@@ -144,6 +144,10 @@ static void dumpOpenCLInformation()
          DUMP_MESSAGE_STDOUT("    Double support = " << doubleSupportStr);
          DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0);
  
+        const char* halfSupportStr = device.halfFPConfig() > 0 ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Half support = " << halfSupportStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.halfFPConfig() > 0);
+
          const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No";
          DUMP_MESSAGE_STDOUT("    Host unified memory = " << isUnifiedMemoryStr);
          DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory());
@@ -191,6 +195,9 @@ static void dumpOpenCLInformation()
  
          DUMP_MESSAGE_STDOUT("    Preferred vector width double = " << device.preferredVectorWidthDouble());
          DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width half = " << device.preferredVectorWidthHalf());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf());
      }
      catch (...)
      {
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp

index 0e97cf52feb30cad363c1aee57b38b879a91bdd4..46185446f7263fce67d876b8601ad25bb5dee50d 100644 (file)
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -1566,6 +1566,7 @@ struct Device::Impl
          version_ = getStrProp(CL_DEVICE_VERSION);
          extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
          doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
+        halfFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG);
          hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
          maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
          maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
@@ -1678,6 +1679,7 @@ struct Device::Impl
      String version_;
      std::string extensions_;
      int doubleFPConfig_;
+    int halfFPConfig_;
      bool hostUnifiedMemory_;
      int maxComputeUnits_;
      size_t maxWorkGroupSize_;
@@ -1827,11 +1829,7 @@ int Device::singleFPConfig() const
  { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
  
  int Device::halfFPConfig() const
-#ifdef CL_VERSION_1_2
-{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
-#else
-{ CV_REQUIRE_OPENCL_1_2_ERROR; }
-#endif
+{ return p ? p->halfFPConfig_ : 0; }
  
  bool Device::endianLittle() const
  { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
@@ -6668,6 +6666,10 @@ void convertFromImage(void* cl_mem_image, UMat& dst)
          depth = CV_32F;
          break;
  
+    case CL_HALF_FLOAT:
+        depth = CV_16F;
+        break;
+
      default:
          CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
      }
@@ -6676,9 +6678,23 @@ void convertFromImage(void* cl_mem_image, UMat& dst)
      switch (fmt.image_channel_order)
      {
      case CL_R:
+    case CL_A:
+    case CL_INTENSITY:
+    case CL_LUMINANCE:
          type = CV_MAKE_TYPE(depth, 1);
          break;
  
+    case CL_RG:
+    case CL_RA:
+        type = CV_MAKE_TYPE(depth, 2);
+        break;
+
+    // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
+    // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
+    /*case CL_RGB:
+        type = CV_MAKE_TYPE(depth, 3);
+        break;*/
+
      case CL_RGBA:
      case CL_BGRA:
      case CL_ARGB:
@@ -7068,6 +7084,13 @@ static std::string kerToStr(const Mat & k)
              stream << "DIG(" << data[i] << "f)";
          stream << "DIG(" << data[width] << "f)";
      }
+    else if (depth == CV_16F)
+    {
+        stream.setf(std::ios_base::showpoint);
+        for (int i = 0; i < width; ++i)
+            stream << "DIG(" << (float)data[i] << "h)";
+        stream << "DIG(" << (float)data[width] << "h)";
+    }
      else
      {
          for (int i = 0; i < width; ++i)
@@ -7091,7 +7114,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
  
      typedef std::string (* func_t)(const Mat &);
      static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
-                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
+                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<float16_t> };
      const func_t func = funcs[ddepth];
      CV_Assert(func != 0);
  
@@ -7130,14 +7153,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
      int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
          d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
          d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
-        d.preferredVectorWidthDouble(), -1 };
+        d.preferredVectorWidthDouble(), d.preferredVectorWidthHalf() };
  
      // if the device says don't use vectors
      if (vectorWidths[0] == 1)
      {
          // it's heuristic
          vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
-        vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
+        vectorWidths[CV_16U] = vectorWidths[CV_16S] = vectorWidths[CV_16F] = 2;
          vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
      }
  
@@ -7225,10 +7248,12 @@ struct Image2D::Impl
      {
          cl_image_format format;
          static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
-                                       CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
+                                       CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, CL_HALF_FLOAT };
          static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
                                                  CL_SNORM_INT16, -1, -1, -1, -1 };
-        static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
+        // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
+        // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
+        static const int channelOrders[] = { -1, CL_R, CL_RG, /*CL_RGB*/ -1, CL_RGBA };
  
          int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
          int channelOrder = channelOrders[cn];
diff --git a/modules/ts/src/ocl_perf.cpp b/modules/ts/src/ocl_perf.cpp

index 8dacf219f64be93e4b2daa1dcc2e906be3e41293..fe521f2c00d988bc76f67a61ae6b801c3d779442 100644 (file)
--- a/modules/ts/src/ocl_perf.cpp
+++ b/modules/ts/src/ocl_perf.cpp
@@ -70,7 +70,7 @@ void randu(InputOutputArray dst)
          cv::randu(dst, -128, 128);
      else if (dst.depth() == CV_16U)
          cv::randu(dst, 0, 1024);
-    else if (dst.depth() == CV_32F || dst.depth() == CV_64F)
+    else if (dst.depth() == CV_32F || dst.depth() == CV_64F || dst.depth() == CV_16F)
          cv::randu(dst, -1.0, 1.0);
      else if (dst.depth() == CV_16S || dst.depth() == CV_32S)
          cv::randu(dst, -4096, 4096);
diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp

index 2a9169fd13a57b43ec2e2a896bbd4a4d9507218c..5a42ca01cdc4f9735962084b8a7baa2775bfb3d6 100644 (file)
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@@ -1297,7 +1297,7 @@ void TestBase::warmup(cv::InputOutputArray a, WarmUpType wtype)
                  cv::randu(a, -128, 128);
              else if (depth == CV_16U)
                  cv::randu(a, 0, 1024);
-            else if (depth == CV_32F || depth == CV_64F)
+            else if (depth == CV_32F || depth == CV_64F || depth == CV_16F)
                  cv::randu(a, -1.0, 1.0);
              else if (depth == CV_16S || depth == CV_32S)
                  cv::randu(a, -4096, 4096);
author	Joe Howse <josephhowse@nummist.com>
	Mon, 21 Jun 2021 03:46:32 +0000 (00:46 -0300)
committer	Joe Howse <josephhowse@nummist.com>
	Wed, 30 Jun 2021 17:14:37 +0000 (14:14 -0300)
modules/core/include/opencv2/core/opencl/opencl_info.hpp		patch \| blob \| history
modules/core/src/ocl.cpp		patch \| blob \| history
modules/ts/src/ocl_perf.cpp		patch \| blob \| history
modules/ts/src/ts_perf.cpp		patch \| blob \| history