This matches gcc and icc despite not being documented in the Intel Intrinsics Guide.
llvm-svn: 340798
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi:
- case X86::BI__builtin_ia32_kortestzhi: {
+ case X86::BI__builtin_ia32_kortestcsi:
+ case X86::BI__builtin_ia32_kortestcdi: {
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
- Value *C;
- if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
- C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
- else
- C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+ Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
+ Value *Cmp = Builder.CreateICmpEQ(Or, C);
+ return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+ }
+ case X86::BI__builtin_ia32_kortestzqi:
+ case X86::BI__builtin_ia32_kortestzhi:
+ case X86::BI__builtin_ia32_kortestzsi:
+ case X86::BI__builtin_ia32_kortestzdi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
Value *Cmp = Builder.CreateICmpEQ(Or, C);
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
+}
+
/* Integer compare */
#define _mm512_cmp_epi8_mask(a, b, p) \
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
+}
+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A * (__v8du) __B);
return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
}
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
+ *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
+ return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
+}
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{
__E, __F);
}
+unsigned char test_kortestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
+ // CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
+ // CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask32_u8
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
+ // CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[OR2:%.*]] = or <32 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <32 x i1> [[OR2]] to i32
+ // CHECK: [[CMP2:%.*]] = icmp eq i32 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D), CF);
+}
+
+unsigned char test_kortestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
+ // CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
+ // CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask64_u8
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
+ // CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[OR2:%.*]] = or <64 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <64 x i1> [[OR2]] to i64
+ // CHECK: [[CMP2:%.*]] = icmp eq i64 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D), CF);
+}
+
__mmask64 test_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
// CHECK-LABEL: @test_mm512_cmpeq_epi8_mask
// CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}}
__E, __F);
}
+unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
+ // CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
+ // CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask8_u8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
+ // CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[OR2:%.*]] = or <8 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <8 x i1> [[OR2]] to i8
+ // CHECK: [[CMP2:%.*]] = icmp eq i8 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D), CF);
+}
+
__m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mullo_epi64
// CHECK: mul <8 x i64>
_mm512_cmpneq_epu32_mask(__C, __D));
}
+unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestz_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+ // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestz_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
+}
+
+unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
+ // CHECK-LABEL: @test_kortestc_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+ // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ return _kortestc_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
+}
+
+unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
+ // CHECK-LABEL: @test_kortest_mask16_u8
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+ // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
+ // CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+ // CHECK: trunc i32 [[ZEXT]] to i8
+ // CHECK: [[LHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[OR2:%.*]] = or <16 x i1> [[LHS2]], [[RHS2]]
+ // CHECK: [[CAST2:%.*]] = bitcast <16 x i1> [[OR2]] to i16
+ // CHECK: [[CMP2:%.*]] = icmp eq i16 [[CAST2]], 0
+ // CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
+ // CHECK: trunc i32 [[ZEXT2]] to i8
+ return _kortest_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D), CF);
+}
+
__mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_mm512_kunpackb
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>