TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "ncV:256:", "avx2")
TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "ncV:256:", "avx2")
TARGET_BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "ncV:256:", "avx2")
TARGET_BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "ncV:256:", "avx2")
TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128, "V2OiV2Oi", "ncV:128:", "avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256, "V4OiV4Oi", "ncV:256:", "avx512cd,avx512vl")
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
+// Emit binary intrinsic with the same type used in result/args.
+static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops, Intrinsic::ID IID) {
+ llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
+ return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
+}
+
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
Load->setVolatile(true);
return Load;
}
+ case X86::BI__builtin_ia32_paddsb512:
+ case X86::BI__builtin_ia32_paddsw512:
+ case X86::BI__builtin_ia32_paddsb256:
+ case X86::BI__builtin_ia32_paddsw256:
+ case X86::BI__builtin_ia32_paddsb128:
+ case X86::BI__builtin_ia32_paddsw128:
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat);
+ case X86::BI__builtin_ia32_paddusb512:
+ case X86::BI__builtin_ia32_paddusw512:
+ case X86::BI__builtin_ia32_paddusb256:
+ case X86::BI__builtin_ia32_paddusw256:
+ case X86::BI__builtin_ia32_paddusb128:
+ case X86::BI__builtin_ia32_paddusw128:
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat);
+ case X86::BI__builtin_ia32_psubsb512:
+ case X86::BI__builtin_ia32_psubsw512:
+ case X86::BI__builtin_ia32_psubsb256:
+ case X86::BI__builtin_ia32_psubsw256:
+ case X86::BI__builtin_ia32_psubsb128:
+ case X86::BI__builtin_ia32_psubsw128:
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat);
+ case X86::BI__builtin_ia32_psubusb512:
+ case X86::BI__builtin_ia32_psubusw512:
+ case X86::BI__builtin_ia32_psubusb256:
+ case X86::BI__builtin_ia32_psubusw256:
+ case X86::BI__builtin_ia32_psubusb128:
+ case X86::BI__builtin_ia32_psubusw128:
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat);
case X86::BI__builtin_ia32_encodekey128_u32: {
Intrinsic::ID IID = Intrinsic::x86_encodekey128;
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_adds_epi8(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
+ return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_adds_epi16(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b);
+ return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_adds_epu8(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b);
+ return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_adds_epu16(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b);
+ return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
}
#define _mm256_alignr_epi8(a, b, n) \
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_subs_epi8(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b);
+ return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_subs_epi16(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b);
+ return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_subs_epu8(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b);
+ return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_subs_epu16(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b);
+ return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_adds_epi8 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B);
+ return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_adds_epi16 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B);
+ return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_adds_epu8 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B);
+ return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_adds_epu16 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B);
+ return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_subs_epi8 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B);
+ return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_subs_epi16 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B);
+ return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_subs_epu8 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B);
+ return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_subs_epu16 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B);
+ return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epi8(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b);
+ return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
}
/// Adds, with saturation, the corresponding elements of two 128-bit
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epi16(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b);
+ return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
}
/// Adds, with saturation, the corresponding elements of two 128-bit
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epu8(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b);
+ return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
}
/// Adds, with saturation, the corresponding elements of two 128-bit
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_adds_epu16(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b);
+ return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
}
/// Computes the rounded averages of corresponding elements of two
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epi8(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
+ return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
}
/// Subtracts corresponding 16-bit signed integer values in the input and
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epi16(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
+ return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
}
/// Subtracts corresponding 8-bit unsigned integer values in the input
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epu8(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b);
+ return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
}
/// Subtracts corresponding 16-bit unsigned integer values in the input
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_subs_epu16(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b);
+ return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
}
/// Performs a bitwise AND of two 128-bit integer vectors.