core: fix F16C compilation check

author Alexander Alekhin <alexander.a.alekhin@gmail.com>

Wed, 11 Nov 2020 17:57:53 +0000 (17:57 +0000)

committer Alexander Alekhin <alexander.a.alekhin@gmail.com>

Tue, 17 Nov 2020 12:22:49 +0000 (12:22 +0000)
author Alexander Alekhin <alexander.a.alekhin@gmail.com>
Wed, 11 Nov 2020 17:57:53 +0000 (17:57 +0000)
committer Alexander Alekhin <alexander.a.alekhin@gmail.com>
Tue, 17 Nov 2020 12:22:49 +0000 (12:22 +0000)
diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h

index 42651ae..540fbb6 100644 (file)
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@@ -216,6 +216,11 @@ struct VZeroUpperGuard {
  #  define CV_VSX 1
  #endif
  
+#ifdef __F16C__
+#  include <immintrin.h>
+#  define CV_FP16 1
+#endif
+
  #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
  
  
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h

index 5bd3af3..6488b8b 100644 (file)
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -765,7 +765,7 @@ protected:
      float16_t() {}
      explicit float16_t(float x)
      {
-    #if CV_AVX2
+    #if CV_FP16
          __m128 v = _mm_load_ss(&x);
          w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
      #else
@@ -796,7 +796,7 @@ protected:
  
      operator float() const
      {
-    #if CV_AVX2
+    #if CV_FP16
          float f;
          _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
          return f;
diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp

index 5dc5bb5..54e8927 100644 (file)
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@@ -3121,18 +3121,39 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, un
  OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
  OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
  
+//
  // FP16
+//
+
  inline v_float32x8 v256_load_expand(const float16_t* ptr)
  {
+#if CV_FP16
      return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    for (int i = 0; i < 8; i++)
+        buf[i] = (float)ptr[i];
+    return v256_load_aligned(buf);
+#endif
  }
  
  inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
  {
+#if CV_FP16
      __m128i ah = _mm256_cvtps_ph(a.val, 0);
      _mm_storeu_si128((__m128i*)ptr, ah);
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    v_store_aligned(buf, a);
+    for (int i = 0; i < 8; i++)
+        ptr[i] = float16_t(buf[i]);
+#endif
  }
  
+//
+// end of FP16
+//
+
  inline void v256_cleanup() { _mm256_zeroall(); }
  
  CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp

index a16a1a8..4af5533 100644 (file)
--- a/modules/core/src/convert.simd.hpp
+++ b/modules/core/src/convert.simd.hpp
@@ -5,6 +5,11 @@
  #include "precomp.hpp"
  #include "convert.hpp"
  
+#if !defined(OPENCV_SUPRESS_WARNING_AVX2_WITHOUT_FP16C) && \
+    (defined(__GNUC__) && defined(__AVX2__) && !defined(__F16C__))
+#warning "Non-optimal compiler flags: AVX2 without FP16. Generated code is very slow. Consider adding '-mf16c' compiler option."
+#endif
+
  namespace cv {
  CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
  
diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp

index 321fa64..71d61e1 100644 (file)
--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@@ -126,9 +126,11 @@ DEFINE_SIMD_TESTS(256, AVX512_SKX)
  
  TEST(hal_intrin256, float16x16_FP16)
  {
+#if CV_TRY_FP16
      //CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
      CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
  }
  
  
@@ -142,8 +144,10 @@ namespace intrin512 {
  
  TEST(hal_intrin512, float16x32_FP16)
  {
+#if CV_TRY_FP16
      CV_CPU_CALL_AVX512_SKX_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
  }
  
  
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp

index 6731091..84da496 100644 (file)
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -1902,21 +1902,21 @@ void test_hal_intrin_float64()
  #endif
  }
  
-#if CV_FP16
  void test_hal_intrin_float16()
  {
      DUMP_ENTRY(v_float16);
  #if CV_FP16
      TheTest<v_float32>()
          .test_loadstore_fp16_f32()
-#endif
  #if CV_SIMD_FP16
          .test_loadstore_fp16()
          .test_float_cvt_fp16()
  #endif
          ;
-}
+#else
+    std::cout << "SKIP: CV_FP16 is not available" << std::endl;
  #endif
+}
  
  /*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
  void test_hal_intrin_float16()
author	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Wed, 11 Nov 2020 17:57:53 +0000 (17:57 +0000)
committer	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Tue, 17 Nov 2020 12:22:49 +0000 (12:22 +0000)
modules/core/include/opencv2/core/cv_cpu_dispatch.h		patch \| blob \| history
modules/core/include/opencv2/core/cvdef.h		patch \| blob \| history
modules/core/include/opencv2/core/hal/intrin_avx.hpp		patch \| blob \| history
modules/core/src/convert.simd.hpp		patch \| blob \| history
modules/core/test/test_intrin.cpp		patch \| blob \| history
modules/core/test/test_intrin_utils.hpp		patch \| blob \| history