const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 24, dj += 8)
{
internal::prefetch(src + sj);
const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 32, dj += 8)
{
internal::prefetch(src + sj);
const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 24, dj += 8)
{
internal::prefetch(src + sj);
const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 32, dj += 8)
{
internal::prefetch(src + sj);
for (; sj < roiw16; sj += 16, dj += 48)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0-d1}, [%[in0]] \n\t"
"vmov.8 q1, q0 \n\t"
if (sj < roiw8)
{
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0}, [%[in]] \n\t"
"vmov.8 d1, d0 \n\t"
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register uint8x16_t vc255 asm ("q4") = vmovq_n_u8(255);
#else
uint8x16x4_t vRgba;
for (; sj < roiw16; sj += 16, dj += 64)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0-d1}, [%[in0]] \n\t"
"vmov.8 q1, q0 \n\t"
if (sj < roiw8)
{
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d5}, [%[in]] \n\t"
"vmov.8 d6, d5 \n\t"
"d24","d25","d26","d27","d28","d29","d30","d31" \
);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define YCRCB_CONSTS \
register int16x4_t vcYR asm ("d31") = vmov_n_s16(4899); \
#define COEFF_G ( 8663)
#define COEFF_B (-17705)
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define YUV420ALPHA3_CONST
#define YUV420ALPHA4_CONST register uint8x16_t c255 asm ("q13") = vmovq_n_u8(255);
#define YUV420ALPHA3_CONVERT
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld3.8 {d0-d2}, d0, d2)
#else
uint8x8x3_t vRgb = vld3_u8(src + sj);
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld4.8 {d0-d3}, d0, d2)
#else
uint8x8x4_t vRgb = vld4_u8(src + sj);
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld3.8 {d0-d2}, d2, d0)
#else
uint8x8x3_t vRgb = vld3_u8(src + sj);
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld4.8 {d0-d3}, d2, d0)
#else
uint8x8x4_t vRgb = vld4_u8(src + sj);
for (; j < roiw16; sj += 64, dj += 32, j += 16)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld4.8 {d2, d4, d6, d8}, [%[in0]] @ q0 q1 q2 q3 q4 \n\t"
"vld4.8 {d3, d5, d7, d9}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t"
for (; j < roiw16; sj += 48, dj += 32, j += 16)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld3.8 {d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 q4 \n\t"
"vld3.8 {d3, d5, d7}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t"
for (; j < roiw16; sj += 64, dj += 32, j += 16)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld4.8 {d0, d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 \n\t"
"vld4.8 {d1, d3, d5, d7}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB aaaaAAAA \n\t"
for (; j < roiw16; sj += 48, dj += 32, j += 16)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld3.8 {d0, d2, d4}, [%[in0]] @ q0 q1 q2 q3 \n\t"
"vld3.8 {d1, d3, d5}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t"
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld3.8 {d0-d2}, d0, d1, d2)
#else
uint8x8x3_t vRgb = vld3_u8(src + sj);
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld4.8 {d0-d3}, d0, d1, d2)
#else
uint8x8x4_t vRgba = vld4_u8(src + sj);
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld3.8 {d0-d2}, d2, d1, d0)
#else
uint8x8x3_t vBgr = vld3_u8(src + sj);
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld4.8 {d0-d3}, d2, d1, d0)
#else
uint8x8x4_t vBgra = vld4_u8(src + sj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d1, d0, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d1, d0, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d0, d1, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d0, d1, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d1, d0, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d1, d0, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d0, d1, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d0, d1, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
}
})
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, s32, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);
register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0);
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u8, f32, 16,
,
{
}
})
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, s16, 16,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, s32, 16,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, f32, 16,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, u8, 16,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, s8, 16,
register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s16, 8,
register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s32, 8,
register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, f32, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, u8, 16,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s8, 16,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s16, u16, 8,
register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s32, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, f32, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u8, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s8, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u16, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s16, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, f32, 8,
,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u8, 8,
register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16));
register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s8, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s32, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u8, s32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u8, f32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s8, s32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s8, f32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, u8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, s8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(u16, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, s16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, u8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, s8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, u16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(s16, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, u8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, s8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, u16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, s16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, u8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)((1 << 16)*alpha));
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)((1 << 16)*beta));
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, s8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, u16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, s16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
})
#endif
-#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
+#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,