The previous names took the shift amount in bits to match gcc and required a multiply by 8 in the header. This creates a misleading error message when we check the range of the immediate to the builtin since the allowed range also got multiplied by 8.
This commit changes the builtins to use a byte shift amount to match the underlying instruction and the Intel intrinsic.
Fixes the remaining issue from PR37795.
llvm-svn: 334773
TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi128, "V2LLiV2LLiIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi128, "V2LLiV2LLiIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2LLiV2LLiIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2LLiV2LLiIi", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_monitor, "vv*UiUi", "n", "sse3")
TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "n", "sse3")
TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi256, "V4LLiV4LLiIi", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4LLiV4LLiIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi256, "V4LLiV4LLiIi", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4LLiV4LLiIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pslldqi512, "V8LLiV8LLiIi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrldqi512, "V8LLiV8LLiIi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8LLiV8LLiIi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8LLiV8LLiIi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "n", "avx512f")
"vperm");
}
- case X86::BI__builtin_ia32_pslldqi128:
- case X86::BI__builtin_ia32_pslldqi256:
- case X86::BI__builtin_ia32_pslldqi512: {
- // Shift value is in bits so divide by 8.
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
llvm::Type *ResultType = Ops[0]->getType();
// Builtin type is vXi64 so multiply by 8 to get bytes.
unsigned NumElts = ResultType->getVectorNumElements() * 8;
"pslldq");
return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
}
- case X86::BI__builtin_ia32_psrldqi128:
- case X86::BI__builtin_ia32_psrldqi256:
- case X86::BI__builtin_ia32_psrldqi512: {
- // Shift value is in bits so divide by 8.
- unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
llvm::Type *ResultType = Ops[0]->getType();
// Builtin type is vXi64 so multiply by 8 to get bytes.
unsigned NumElts = ResultType->getVectorNumElements() * 8;
}
#define _mm256_slli_si256(a, imm) \
- (__m256i)__builtin_ia32_pslldqi256((__v4di)(__m256i)(a), (int)(imm) * 8)
+ (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))
#define _mm256_bslli_epi128(a, imm) \
- (__m256i)__builtin_ia32_pslldqi256((__v4di)(__m256i)(a), (int)(imm) * 8)
+ (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_slli_epi16(__m256i __a, int __count)
}
#define _mm256_srli_si256(a, imm) \
- (__m256i)__builtin_ia32_psrldqi256((__m256i)(a), (int)(imm) * 8)
+ (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))
#define _mm256_bsrli_epi128(a, imm) \
- (__m256i)__builtin_ia32_psrldqi256((__m256i)(a), (int)(imm) * 8)
+ (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_srli_epi16(__m256i __a, int __count)
}
#define _mm512_bslli_epi128(a, imm) \
- (__m512i)__builtin_ia32_pslldqi512((__v8di)(__m512i)(a), (int)(imm) * 8)
+ (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_srlv_epi16(__m512i __A, __m512i __B)
}
#define _mm512_bsrli_epi128(a, imm) \
- (__m512i)__builtin_ia32_psrldqi512((__v8di)(__m512i)(a), (int)(imm) * 8)
+ (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
/// \a a.
/// \returns A 128-bit integer vector containing the left-shifted value.
#define _mm_slli_si128(a, imm) \
- (__m128i)__builtin_ia32_pslldqi128((__v2di)(__m128i)(a), (int)(imm) * 8)
+ (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
#define _mm_bslli_si128(a, imm) \
- (__m128i)__builtin_ia32_pslldqi128((__v2di)(__m128i)(a), (int)(imm) * 8)
+ (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
/// Left-shifts each 16-bit value in the 128-bit integer vector operand
/// by the specified number of bits. Low-order bits are cleared.
/// \a a.
/// \returns A 128-bit integer vector containing the right-shifted value.
#define _mm_srli_si128(a, imm) \
- (__m128i)__builtin_ia32_psrldqi128((__v2di)(__m128i)(a), (int)(imm) * 8)
+ (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
#define _mm_bsrli_si128(a, imm) \
- (__m128i)__builtin_ia32_psrldqi128((__v2di)(__m128i)(a), (int)(imm) * 8)
+ (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
/// Right-shifts each of 16-bit values in the 128-bit integer vector
/// operand by the specified number of bits. High-order bits are cleared.
case X86::BI__builtin_ia32_fpclasspd512_mask:
case X86::BI__builtin_ia32_fpclasssd_mask:
case X86::BI__builtin_ia32_fpclassss_mask:
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift:
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift:
i = 1; l = 0; u = 255;
break;
case X86::BI__builtin_ia32_vperm2f128_pd256:
case X86::BI__builtin_ia32_rndscaless_round_mask:
i = 4; l = 0; u = 255;
break;
- case X86::BI__builtin_ia32_pslldqi128:
- case X86::BI__builtin_ia32_pslldqi256:
- case X86::BI__builtin_ia32_pslldqi512:
- case X86::BI__builtin_ia32_psrldqi128:
- case X86::BI__builtin_ia32_psrldqi256:
- case X86::BI__builtin_ia32_psrldqi512:
- i = 1; l = 0; u = 2047;
- break;
}
return SemaBuiltinConstantArgRange(TheCall, i, l, u);
}