From: Vincent Rabaud Date: Thu, 15 Dec 2022 11:28:30 +0000 (+0100) Subject: Fix slower CV_PAUSE on SkyLake and above. X-Git-Tag: accepted/tizen/unified/20230127.161057~1^2~6^2~8^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b7b08fa0c3e5fd4fb656217210f33d9809c8efc5;p=platform%2Fupstream%2Fopencv.git Fix slower CV_PAUSE on SkyLake and above. This is fixing https://github.com/opencv/opencv/issues/22852 --- diff --git a/modules/core/src/parallel_impl.cpp b/modules/core/src/parallel_impl.cpp index 1d9690b04c..087b41233b 100644 --- a/modules/core/src/parallel_impl.cpp +++ b/modules/core/src/parallel_impl.cpp @@ -52,11 +52,14 @@ DECLARE_CV_PAUSE #endif #ifndef CV_PAUSE # if defined __GNUC__ && (defined __i386__ || defined __x86_64__) +# include /* for __rdtsc */ # if !defined(__SSE2__) static inline void cv_non_sse_mm_pause() { __asm__ __volatile__ ("rep; nop"); } # define _mm_pause cv_non_sse_mm_pause # endif -# define CV_PAUSE(v) do { for (int __delay = (v); __delay > 0; --__delay) { _mm_pause(); } } while (0) +// 5 * v is meants for backward compatibility: with pre-Skylake CPUs, _mm_pause took 4 or 5 cycles. +// With post-Skylake CPUs, _mm_pause takes 140 cycles. +# define CV_PAUSE(v) do { const uint64_t __delay = 5 * v; uint64_t __init = __rdtsc(); do { _mm_pause(); } while ((__rdtsc() - __init) < __delay); } while (0) # elif defined __GNUC__ && defined __aarch64__ # define CV_PAUSE(v) do { for (int __delay = (v); __delay > 0; --__delay) { asm volatile("yield" ::: "memory"); } } while (0) # elif defined __GNUC__ && defined __arm__