From: Paul E. Murphy Date: Thu, 12 Sep 2019 14:45:56 +0000 (-0500) Subject: core: workaround old gcc vec_mul{e,o} (Issue #15506) X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~1^2~109^2~4^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b465c826963f9ea01d466f0281b568196e6ef933;p=platform%2Fupstream%2Fopencv.git core: workaround old gcc vec_mul{e,o} (Issue #15506) ISA 2.07 (aka POWER8) effectively extended the expanding multiply operation to word types. The altivec intrinsics prior to gcc 8 did not get the update. Workaround this deficiency similar to other fixes. This was exposed by commit 33fb253a66275abaa5060ef318c9a5cc87c5fd6e which leverages the int -> dword expanding multiply. This fixes Issue #15506 --- diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index c60da3c..0f1029d 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -124,6 +124,33 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) \ #define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm) +#if __GNUG__ < 8 + + // Support for int4 -> dword2 expanding multiply was added in GCC 8. + #ifdef vec_mule + #undef vec_mule + #endif + #ifdef vec_mulo + #undef vec_mulo + #endif + + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mulo, __builtin_vec_mulo) + + // dword2 support arrived in ISA 2.07 and GCC 8+ + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulesw, vec_mule) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmuleuw, vec_mule) + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulosw, vec_mulo) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmulouw, vec_mulo) + +#endif + #if __GNUG__ < 7 // up to GCC 6 vec_mul only supports precisions and llong # ifdef vec_mul