From c330ca861139d6b43766885007069b0e05e15b72 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 27 Aug 2018 06:20:22 +0000 Subject: [PATCH] [X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit mask registers. This also adds a second intrinsic name for the 16-bit mask versions. These intrinsics match gcc and icc. They just aren't published in the Intel Intrinsics Guide so I only recently found they existed. llvm-svn: 340719 --- clang/include/clang/Basic/BuiltinsX86.def | 18 +++++ clang/lib/CodeGen/CGBuiltin.cpp | 44 +++++++--- clang/lib/Headers/avx512bwintrin.h | 72 +++++++++++++++++ clang/lib/Headers/avx512dqintrin.h | 38 +++++++++ clang/lib/Headers/avx512fintrin.h | 7 ++ clang/test/CodeGen/avx512bw-builtins.c | 130 ++++++++++++++++++++++++++++++ clang/test/CodeGen/avx512dq-builtins.c | 65 +++++++++++++++ clang/test/CodeGen/avx512f-builtins.c | 65 +++++++++++++++ 8 files changed, 427 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 98de5fa..fc5096d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1005,7 +1005,10 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "nV:512:", "avx512p TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf") TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_knotdi, "ULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl") @@ -1734,14 +1737,29 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx5 TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kanddi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kandndi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kxnordi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a8ae7ed..46c073e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8603,8 +8603,9 @@ static Value *EmitX86CompressStore(CodeGenFunction &CGF, } static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, - unsigned NumElts, ArrayRef Ops, + ArrayRef Ops, bool InvertLHS = false) { + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); @@ -10013,7 +10014,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_kortestchi: case X86::BI__builtin_ia32_kortestzhi: { - Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); Value *C; if (BuiltinID == X86::BI__builtin_ia32_kortestchi) C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty()); @@ -10023,26 +10024,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } + case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: - return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + return EmitX86MaskLogic(*this, Instruction::And, Ops); + case X86::BI__builtin_ia32_kandnqi: case X86::BI__builtin_ia32_kandnhi: - return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + return EmitX86MaskLogic(*this, Instruction::And, Ops, true); + case X86::BI__builtin_ia32_korqi: case X86::BI__builtin_ia32_korhi: - return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + return EmitX86MaskLogic(*this, Instruction::Or, Ops); + case X86::BI__builtin_ia32_kxnorqi: case X86::BI__builtin_ia32_kxnorhi: - return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true); + case X86::BI__builtin_ia32_kxorqi: case X86::BI__builtin_ia32_kxorhi: - return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); - case X86::BI__builtin_ia32_knothi: { - Ops[0] = getMaskVecValue(*this, Ops[0], 16); - return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), - Builder.getInt16Ty()); + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: + return EmitX86MaskLogic(*this, Instruction::Xor, Ops); + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: { + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *Res = getMaskVecValue(*this, Ops[0], NumElts); + return Builder.CreateBitCast(Builder.CreateNot(Res), + Ops[0]->getType()); } case X86::BI__builtin_ia32_kunpckdi: case X86::BI__builtin_ia32_kunpcksi: case X86::BI__builtin_ia32_kunpckhi: { - unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits(); + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); uint32_t Indices[64]; diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 0f5b91f..6f39b2e 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -35,6 +35,78 @@ typedef unsigned long long __mmask64; #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) +static __inline __mmask32 __DEFAULT_FN_ATTRS +_knot_mask32(__mmask32 __M) +{ + return __builtin_ia32_knotsi(__M); +} + +static __inline __mmask64 __DEFAULT_FN_ATTRS +_knot_mask64(__mmask64 __M) +{ + return __builtin_ia32_knotdi(__M); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kand_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kand_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kandn_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kandn_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kor_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kor_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kxnor_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kxnor_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kxor_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kxor_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); +} + /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) \ diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 25165f2a..a887078 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -30,6 +30,43 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) + +static __inline __mmask8 __DEFAULT_FN_ATTRS +_knot_mask8(__mmask8 __M) +{ + return __builtin_ia32_knotqi(__M); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kand_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kandn_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kor_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kxnor_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kxor_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); +} static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { @@ -1257,5 +1294,6 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__mmask8)(U)) #undef __DEFAULT_FN_ATTRS512 +#undef __DEFAULT_FN_ATTRS #endif diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 86425b4..47cc86a 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8369,6 +8369,13 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B) return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); } +#define _kand_mask16 _mm512_kand +#define _kandn_mask16 _mm512_kandn +#define _knot_mask16 _mm512_knot +#define _kor_mask16 _mm512_kor +#define _kxnor_mask16 _mm512_kxnor +#define _kxor_mask16 _mm512_kxor + static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512 (__m512i * __P, __m512i __A) { diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c index 96f142c..7a3a7d0 100644 --- a/clang/test/CodeGen/avx512bw-builtins.c +++ b/clang/test/CodeGen/avx512bw-builtins.c @@ -4,6 +4,136 @@ #include +__mmask32 test_knot_mask32(__mmask32 a) { + // CHECK-LABEL: @test_knot_mask32 + // CHECK: [[IN:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], + // CHECK: bitcast <32 x i1> [[NOT]] to i32 + return _knot_mask32(a); +} + +__mmask64 test_knot_mask64(__mmask64 a) { + // CHECK-LABEL: @test_knot_mask64 + // CHECK: [[IN:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], + // CHECK: bitcast <64 x i1> [[NOT]] to i64 + return _knot_mask64(a); +} + +__mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kand_mask32 + // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RES:%.*]] = and <32 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kand_mask32(_mm512_cmpneq_epu16_mask(__A, __B), + _mm512_cmpneq_epu16_mask(__C, __D)), + __E, __F); +} + +__mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kand_mask64 + // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RES:%.*]] = and <64 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kand_mask64(_mm512_cmpneq_epu8_mask(__A, __B), + _mm512_cmpneq_epu8_mask(__C, __D)), + __E, __F); +} + +__mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kandn_mask32 + // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = and <32 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kandn_mask32(_mm512_cmpneq_epu16_mask(__A, __B), + _mm512_cmpneq_epu16_mask(__C, __D)), + __E, __F); +} + +__mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kandn_mask64 + // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = and <64 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kandn_mask64(_mm512_cmpneq_epu8_mask(__A, __B), + _mm512_cmpneq_epu8_mask(__C, __D)), + __E, __F); +} + +__mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kor_mask32 + // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RES:%.*]] = or <32 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kor_mask32(_mm512_cmpneq_epu16_mask(__A, __B), + _mm512_cmpneq_epu16_mask(__C, __D)), + __E, __F); +} + +__mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kor_mask64 + // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RES:%.*]] = or <64 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kor_mask64(_mm512_cmpneq_epu8_mask(__A, __B), + _mm512_cmpneq_epu8_mask(__C, __D)), + __E, __F); +} + +__mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxnor_mask32 + // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = xor <32 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kxnor_mask32(_mm512_cmpneq_epu16_mask(__A, __B), + _mm512_cmpneq_epu16_mask(__C, __D)), + __E, __F); +} + +__mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxnor_mask64 + // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = xor <64 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kxnor_mask64(_mm512_cmpneq_epu8_mask(__A, __B), + _mm512_cmpneq_epu8_mask(__C, __D)), + __E, __F); +} + +__mmask32 test_kxor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxor_mask32 + // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RES:%.*]] = xor <32 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kxor_mask32(_mm512_cmpneq_epu16_mask(__A, __B), + _mm512_cmpneq_epu16_mask(__C, __D)), + __E, __F); +} + +__mmask64 test_kxor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxor_mask64 + // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RES:%.*]] = xor <64 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kxor_mask64(_mm512_cmpneq_epu8_mask(__A, __B), + _mm512_cmpneq_epu8_mask(__C, __D)), + __E, __F); +} + __mmask64 test_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { // CHECK-LABEL: @test_mm512_cmpeq_epi8_mask // CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/avx512dq-builtins.c b/clang/test/CodeGen/avx512dq-builtins.c index 6ac3aff..0dd0049 100644 --- a/clang/test/CodeGen/avx512dq-builtins.c +++ b/clang/test/CodeGen/avx512dq-builtins.c @@ -3,6 +3,71 @@ #include +__mmask8 test_knot_mask8(__mmask8 a) { + // CHECK-LABEL: @test_knot_mask8 + // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], + // CHECK: bitcast <8 x i1> [[NOT]] to i8 + return _knot_mask8(a); +} + +__mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kand_mask8 + // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RES:%.*]] = and <8 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kand_mask8(_mm512_cmpneq_epu64_mask(__A, __B), + _mm512_cmpneq_epu64_mask(__C, __D)), + __E, __F); +} + +__mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kandn_mask8 + // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = and <8 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kandn_mask8(_mm512_cmpneq_epu64_mask(__A, __B), + _mm512_cmpneq_epu64_mask(__C, __D)), + __E, __F); +} + +__mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kor_mask8 + // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RES:%.*]] = or <8 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kor_mask8(_mm512_cmpneq_epu64_mask(__A, __B), + _mm512_cmpneq_epu64_mask(__C, __D)), + __E, __F); +} + +__mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxnor_mask8 + // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = xor <8 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kxnor_mask8(_mm512_cmpneq_epu64_mask(__A, __B), + _mm512_cmpneq_epu64_mask(__C, __D)), + __E, __F); +} + +__mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxor_mask8 + // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RES:%.*]] = xor <8 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kxor_mask8(_mm512_cmpneq_epu64_mask(__A, __B), + _mm512_cmpneq_epu64_mask(__C, __D)), + __E, __F); +} + __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mullo_epi64 // CHECK: mul <8 x i64> diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 17cbc62..326d6d44 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -8185,6 +8185,71 @@ __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +__mmask16 test_knot_mask16(__mmask16 a) { + // CHECK-LABEL: @test_knot_mask16 + // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], + // CHECK: bitcast <16 x i1> [[NOT]] to i16 + return _knot_mask16(a); +} + +__mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kand_mask16 + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kand_mask16(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); +} + +__mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kandn_mask16 + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kandn_mask16(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); +} + +__mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kor_mask16 + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); +} + +__mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxnor_mask16 + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kxnor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); +} + +__mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kxor_mask16 + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kxor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); +} + void test_mm512_stream_si512(__m512i * __P, __m512i __A) { // CHECK-LABEL: @test_mm512_stream_si512 // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal -- 2.7.4