From: Khem Raj Date: Wed, 20 Sep 2017 01:07:35 +0000 (-0700) Subject: Do not enable asm with clang X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~595^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2c7a123ed877fb88e420dff4962f89086891105d;p=platform%2Fupstream%2Fopencv.git Do not enable asm with clang clang pretends to be gcc 4.2.0 which means we will use inline asm for no reason, instead of builtins on clang when possible. Signed-off-by: Khem Raj --- diff --git a/3rdparty/carotene/src/channel_extract.cpp b/3rdparty/carotene/src/channel_extract.cpp index 8238a3e..ff4fb37 100644 --- a/3rdparty/carotene/src/channel_extract.cpp +++ b/3rdparty/carotene/src/channel_extract.cpp @@ -231,7 +231,7 @@ void extract4(const Size2D &size, srcStride == dst2Stride && \ srcStride == dst3Stride && -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) #define SPLIT_ASM2(sgn, bits) __asm__ ( \ "vld2." #bits " {d0, d2}, [%[in0]] \n\t" \ @@ -351,7 +351,7 @@ void extract4(const Size2D &size, } \ } -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) #define ALPHA_QUAD(sgn, bits) { \ internal::prefetch(src + sj); \ diff --git a/3rdparty/carotene/src/channels_combine.cpp b/3rdparty/carotene/src/channels_combine.cpp index fc98fb9..5d9251d 100644 --- a/3rdparty/carotene/src/channels_combine.cpp +++ b/3rdparty/carotene/src/channels_combine.cpp @@ -77,7 +77,7 @@ namespace CAROTENE_NS { dstStride == src2Stride && \ dstStride == src3Stride && -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) #define MERGE_ASM2(sgn, bits) __asm__ ( \ "vld1." #bits " {d0-d1}, [%[in0]] \n\t" \ diff --git a/3rdparty/carotene/src/colorconvert.cpp b/3rdparty/carotene/src/colorconvert.cpp index 38cf75c..a191633 100644 --- a/3rdparty/carotene/src/colorconvert.cpp +++ b/3rdparty/carotene/src/colorconvert.cpp @@ -97,7 +97,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space, const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709; const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y); register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y); register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y); @@ -116,7 +116,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space, u8 * dst = internal::getRowPtr(dstBase, dstStride, i); size_t sj = 0u, dj = 0u; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) for (; dj < roiw8; sj += 24, dj += 8) { internal::prefetch(src + sj); @@ -198,7 +198,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space, const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709; const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y); register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y); register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y); @@ -217,7 +217,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space, u8 * dst = internal::getRowPtr(dstBase, dstStride, i); size_t sj = 0u, dj = 0u; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) for (; dj < roiw8; sj += 32, dj += 8) { internal::prefetch(src + sj); @@ -300,7 +300,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space, const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709; const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y); register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y); register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y); @@ -319,7 +319,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space, u8 * dst = internal::getRowPtr(dstBase, dstStride, i); size_t sj = 0u, dj = 0u; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) for (; dj < roiw8; sj += 24, dj += 8) { internal::prefetch(src + sj); @@ -402,7 +402,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space, const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709; const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y); register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y); register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y); @@ -421,7 +421,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space, u8 * dst = internal::getRowPtr(dstBase, dstStride, i); size_t sj = 0u, dj = 0u; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) for (; dj < roiw8; sj += 32, dj += 8) { internal::prefetch(src + sj); @@ -512,7 +512,7 @@ void gray2rgb(const Size2D &size, for (; sj < roiw16; sj += 16, dj += 48) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld1.8 {d0-d1}, [%[in0]] \n\t" "vmov.8 q1, q0 \n\t" @@ -538,7 +538,7 @@ void gray2rgb(const Size2D &size, if (sj < roiw8) { -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld1.8 {d0}, [%[in]] \n\t" "vmov.8 d1, d0 \n\t" @@ -584,7 +584,7 @@ void gray2rgbx(const Size2D &size, size_t roiw16 = size.width >= 15 ? size.width - 15 : 0; size_t roiw8 = size.width >= 7 ? size.width - 7 : 0; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register uint8x16_t vc255 asm ("q4") = vmovq_n_u8(255); #else uint8x16x4_t vRgba; @@ -602,7 +602,7 @@ void gray2rgbx(const Size2D &size, for (; sj < roiw16; sj += 16, dj += 64) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld1.8 {d0-d1}, [%[in0]] \n\t" "vmov.8 q1, q0 \n\t" @@ -628,7 +628,7 @@ void gray2rgbx(const Size2D &size, if (sj < roiw8) { -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld1.8 {d5}, [%[in]] \n\t" "vmov.8 d6, d5 \n\t" @@ -1409,7 +1409,7 @@ inline void convertToHSV(const s32 r, const s32 g, const s32 b, "d24","d25","d26","d27","d28","d29","d30","d31" \ ); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) #define YCRCB_CONSTS \ register int16x4_t vcYR asm ("d31") = vmov_n_s16(4899); \ @@ -1555,7 +1555,7 @@ inline uint8x8x3_t convertToYCrCb( const int16x8_t& vR, const int16x8_t& vG, con #define COEFF_G ( 8663) #define COEFF_B (-17705) -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) #define YUV420ALPHA3_CONST #define YUV420ALPHA4_CONST register uint8x16_t c255 asm ("q13") = vmovq_n_u8(255); #define YUV420ALPHA3_CONVERT @@ -1852,7 +1852,7 @@ void rgb2hsv(const Size2D &size, #ifdef CAROTENE_NEON size_t roiw8 = size.width >= 7 ? size.width - 7 : 0; const s32 hsv_shift = 12; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register const f32 vsdiv_table = f32(255 << hsv_shift); register f32 vhdiv_table = f32(hrange << hsv_shift); register const s32 vhrange = hrange; @@ -1871,7 +1871,7 @@ void rgb2hsv(const Size2D &size, for (; j < roiw8; sj += 24, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERT_TO_HSV_ASM(vld3.8 {d0-d2}, d0, d2) #else uint8x8x3_t vRgb = vld3_u8(src + sj); @@ -1904,7 +1904,7 @@ void rgbx2hsv(const Size2D &size, #ifdef CAROTENE_NEON size_t roiw8 = size.width >= 7 ? size.width - 7 : 0; const s32 hsv_shift = 12; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register const f32 vsdiv_table = f32(255 << hsv_shift); register f32 vhdiv_table = f32(hrange << hsv_shift); register const s32 vhrange = hrange; @@ -1923,7 +1923,7 @@ void rgbx2hsv(const Size2D &size, for (; j < roiw8; sj += 32, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERT_TO_HSV_ASM(vld4.8 {d0-d3}, d0, d2) #else uint8x8x4_t vRgb = vld4_u8(src + sj); @@ -1956,7 +1956,7 @@ void bgr2hsv(const Size2D &size, #ifdef CAROTENE_NEON size_t roiw8 = size.width >= 7 ? size.width - 7 : 0; const s32 hsv_shift = 12; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register const f32 vsdiv_table = f32(255 << hsv_shift); register f32 vhdiv_table = f32(hrange << hsv_shift); register const s32 vhrange = hrange; @@ -1975,7 +1975,7 @@ void bgr2hsv(const Size2D &size, for (; j < roiw8; sj += 24, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERT_TO_HSV_ASM(vld3.8 {d0-d2}, d2, d0) #else uint8x8x3_t vRgb = vld3_u8(src + sj); @@ -2008,7 +2008,7 @@ void bgrx2hsv(const Size2D &size, #ifdef CAROTENE_NEON size_t roiw8 = size.width >= 7 ? size.width - 7 : 0; const s32 hsv_shift = 12; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register const f32 vsdiv_table = f32(255 << hsv_shift); register f32 vhdiv_table = f32(hrange << hsv_shift); register const s32 vhrange = hrange; @@ -2027,7 +2027,7 @@ void bgrx2hsv(const Size2D &size, for (; j < roiw8; sj += 32, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERT_TO_HSV_ASM(vld4.8 {d0-d3}, d2, d0) #else uint8x8x4_t vRgb = vld4_u8(src + sj); @@ -2068,7 +2068,7 @@ void rgbx2bgr565(const Size2D &size, for (; j < roiw16; sj += 64, dj += 32, j += 16) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld4.8 {d2, d4, d6, d8}, [%[in0]] @ q0 q1 q2 q3 q4 \n\t" "vld4.8 {d3, d5, d7, d9}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t" @@ -2122,7 +2122,7 @@ void rgb2bgr565(const Size2D &size, for (; j < roiw16; sj += 48, dj += 32, j += 16) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld3.8 {d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 q4 \n\t" "vld3.8 {d3, d5, d7}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t" @@ -2176,7 +2176,7 @@ void rgbx2rgb565(const Size2D &size, for (; j < roiw16; sj += 64, dj += 32, j += 16) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld4.8 {d0, d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 \n\t" "vld4.8 {d1, d3, d5, d7}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB aaaaAAAA \n\t" @@ -2230,7 +2230,7 @@ void rgb2rgb565(const Size2D &size, for (; j < roiw16; sj += 48, dj += 32, j += 16) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld3.8 {d0, d2, d4}, [%[in0]] @ q0 q1 q2 q3 \n\t" "vld3.8 {d1, d3, d5}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t" @@ -2285,7 +2285,7 @@ void rgb2ycrcb(const Size2D &size, for (; j < roiw8; sj += 24, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTTOYCRCB(vld3.8 {d0-d2}, d0, d1, d2) #else uint8x8x3_t vRgb = vld3_u8(src + sj); @@ -2329,7 +2329,7 @@ void rgbx2ycrcb(const Size2D &size, for (; j < roiw8; sj += 32, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTTOYCRCB(vld4.8 {d0-d3}, d0, d1, d2) #else uint8x8x4_t vRgba = vld4_u8(src + sj); @@ -2373,7 +2373,7 @@ void bgr2ycrcb(const Size2D &size, for (; j < roiw8; sj += 24, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTTOYCRCB(vld3.8 {d0-d2}, d2, d1, d0) #else uint8x8x3_t vBgr = vld3_u8(src + sj); @@ -2417,7 +2417,7 @@ void bgrx2ycrcb(const Size2D &size, for (; j < roiw8; sj += 32, dj += 24, j += 8) { internal::prefetch(src + sj); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTTOYCRCB(vld4.8 {d0-d3}, d2, d1, d0) #else uint8x8x4_t vBgra = vld4_u8(src + sj); @@ -2499,7 +2499,7 @@ void yuv420sp2rgb(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(3, d1, d0, q5, q6) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); @@ -2545,7 +2545,7 @@ void yuv420sp2rgbx(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(4, d1, d0, q5, q6) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); @@ -2591,7 +2591,7 @@ void yuv420i2rgb(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(3, d0, d1, q5, q6) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); @@ -2637,7 +2637,7 @@ void yuv420i2rgbx(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(4, d0, d1, q5, q6) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); @@ -2683,7 +2683,7 @@ void yuv420sp2bgr(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(3, d1, d0, q6, q5) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); @@ -2729,7 +2729,7 @@ void yuv420sp2bgrx(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(4, d1, d0, q6, q5) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); @@ -2775,7 +2775,7 @@ void yuv420i2bgr(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(3, d0, d1, q6, q5) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); @@ -2821,7 +2821,7 @@ void yuv420i2bgrx(const Size2D &size, internal::prefetch(uv + j); internal::prefetch(y1 + j); internal::prefetch(y2 + j); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CONVERTYUV420TORGB(4, d0, d1, q6, q5) #else convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj); diff --git a/3rdparty/carotene/src/convert.cpp b/3rdparty/carotene/src/convert.cpp index 64b6db7..f0c2d15 100644 --- a/3rdparty/carotene/src/convert.cpp +++ b/3rdparty/carotene/src/convert.cpp @@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16, } }) -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVT_FUNC(u8, u16, 16, register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);, { @@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVT_FUNC(u8, s32, 16, register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0); register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0); @@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(u8, f32, 16, , { @@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16, } }) -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVT_FUNC(s8, u16, 16, register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);, { @@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s8, s16, 16, , { @@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVT_FUNC(s8, s32, 16, , { @@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s8, f32, 16, , { @@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(u16, u8, 16, , { @@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(u16, s8, 16, register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);, { @@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVT_FUNC(u16, s16, 8, register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);, { @@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVT_FUNC(u16, s32, 8, register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);, { @@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(u16, f32, 8, , { @@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s16, u8, 16, , { @@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s16, s8, 16, , { @@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVT_FUNC(s16, u16, 8, register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);, { @@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s16, s32, 8, , { @@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s16, f32, 8, , { @@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s32, u8, 8, , { @@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s32, s8, 8, , { @@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s32, u16, 8, , { @@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s32, s16, 8, , { @@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(s32, f32, 8, , { @@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(f32, u8, 8, register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16)); register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);, @@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(f32, s8, 8, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, { @@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(f32, u16, 8, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, { @@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(f32, s16, 8, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, { @@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) CVT_FUNC(f32, s32, 8, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, { diff --git a/3rdparty/carotene/src/convert_scale.cpp b/3rdparty/carotene/src/convert_scale.cpp index ae41a98..d599d24 100644 --- a/3rdparty/carotene/src/convert_scale.cpp +++ b/3rdparty/carotene/src/convert_scale.cpp @@ -473,7 +473,7 @@ CVTS_FUNC(u8, s16, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(u8, s32, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -562,7 +562,7 @@ CVTS_FUNC(u8, s32, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(u8, f32, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);, @@ -985,7 +985,7 @@ CVTS_FUNC(s8, s16, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s8, s32, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1074,7 +1074,7 @@ CVTS_FUNC(s8, s32, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s8, f32, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);, @@ -1155,7 +1155,7 @@ CVTS_FUNC(s8, f32, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(u16, u8, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1214,7 +1214,7 @@ CVTS_FUNC(u16, u8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(u16, s8, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1273,7 +1273,7 @@ CVTS_FUNC(u16, s8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC1(u16, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1330,7 +1330,7 @@ CVTS_FUNC1(u16, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(u16, s16, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1387,7 +1387,7 @@ CVTS_FUNC(u16, s16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(u16, s32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1443,7 +1443,7 @@ CVTS_FUNC(u16, s32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(u16, f32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);, @@ -1495,7 +1495,7 @@ CVTS_FUNC(u16, f32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s16, u8, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1554,7 +1554,7 @@ CVTS_FUNC(s16, u8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s16, s8, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1613,7 +1613,7 @@ CVTS_FUNC(s16, s8, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s16, u16, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1670,7 +1670,7 @@ CVTS_FUNC(s16, u16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC1(s16, 16, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1727,7 +1727,7 @@ CVTS_FUNC1(s16, 16, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s16, s32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1783,7 +1783,7 @@ CVTS_FUNC(s16, s32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s16, f32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);, @@ -1835,7 +1835,7 @@ CVTS_FUNC(s16, f32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s32, u8, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1893,7 +1893,7 @@ CVTS_FUNC(s32, u8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s32, s8, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -1951,7 +1951,7 @@ CVTS_FUNC(s32, s8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s32, u16, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -2007,7 +2007,7 @@ CVTS_FUNC(s32, u16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s32, s16, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -2063,7 +2063,7 @@ CVTS_FUNC(s32, s16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC1(s32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -2118,7 +2118,7 @@ CVTS_FUNC1(s32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(s32, f32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);, @@ -2169,7 +2169,7 @@ CVTS_FUNC(s32, f32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(f32, u8, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)((1 << 16)*alpha)); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)((1 << 16)*beta)); @@ -2239,7 +2239,7 @@ CVTS_FUNC(f32, u8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(f32, s8, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -2293,7 +2293,7 @@ CVTS_FUNC(f32, s8, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(f32, u16, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -2345,7 +2345,7 @@ CVTS_FUNC(f32, u16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(f32, s16, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -2397,7 +2397,7 @@ CVTS_FUNC(f32, s16, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC(f32, s32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);, @@ -2448,7 +2448,7 @@ CVTS_FUNC(f32, s32, 8, }) #endif -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) CVTS_FUNC1(f32, 8, register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha); register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);, diff --git a/3rdparty/carotene/src/gaussian_blur.cpp b/3rdparty/carotene/src/gaussian_blur.cpp index f7b5f18..e5aa8fc 100644 --- a/3rdparty/carotene/src/gaussian_blur.cpp +++ b/3rdparty/carotene/src/gaussian_blur.cpp @@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn, u16* lidx1 = lane + x - 1*2; u16* lidx3 = lane + x + 1*2; u16* lidx4 = lane + x + 2*2; -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ __volatile__ ( "vld2.16 {d0, d2}, [%[in0]]! \n\t" "vld2.16 {d1, d3}, [%[in0]] \n\t" diff --git a/3rdparty/carotene/src/pyramid.cpp b/3rdparty/carotene/src/pyramid.cpp index 4aaab6d..3e68fee 100644 --- a/3rdparty/carotene/src/pyramid.cpp +++ b/3rdparty/carotene/src/pyramid.cpp @@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize, for (; x < roiw8; x += 8) { internal::prefetch(lane + 2 * x); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld2.16 {d0-d3}, [%[in0]] \n\t" "vld2.16 {d4-d7}, [%[in4]] \n\t" @@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize, for (; x < roiw4; x += 4) { internal::prefetch(lane + 2 * x); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld2.32 {d0-d3}, [%[in0]] \n\t" "vld2.32 {d4-d7}, [%[in4]] \n\t" @@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize, std::vector _buf(cn*(srcSize.width + 4) + 32/sizeof(f32)); f32* lane = internal::alignPtr(&_buf[2*cn], 32); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) register float32x4_t vc6d4f32 asm ("q11") = vmovq_n_f32(1.5f); // 6/4 register float32x4_t vc1d4f32 asm ("q12") = vmovq_n_f32(0.25f); // 1/4 @@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize, for (; x < roiw4; x += 4) { internal::prefetch(lane + 2 * x); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ __volatile__ ( "vld2.32 {d0-d3}, [%[in0]] \n\t" "vld2.32 {d8-d11}, [%[in4]] \n\t" diff --git a/3rdparty/carotene/src/scharr.cpp b/3rdparty/carotene/src/scharr.cpp index 8d3b632..36f6b22 100644 --- a/3rdparty/carotene/src/scharr.cpp +++ b/3rdparty/carotene/src/scharr.cpp @@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn, internal::prefetch(srow0 + x); internal::prefetch(srow1 + x); internal::prefetch(srow2 + x); -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__) __asm__ ( "vld1.8 {d0}, [%[src0]] \n\t" "vld1.8 {d2}, [%[src2]] \n\t" @@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn, x = 0; for( ; x < roiw8; x += 8 ) { -#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 +#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__) __asm__ ( "vld1.16 {d4-d5}, [%[s2ptr]] \n\t" "vld1.16 {d8-d9}, [%[s4ptr]] \n\t"