From a65bf65e0bb4f2c119bb388e7f036e664d108669 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 28 Aug 2018 22:32:14 +0000 Subject: [PATCH] [X86] Add kadd intrinsics to match gcc and icc. This adds the following intrinsics: _kadd_mask64 _kadd_mask32 _kadd_mask16 _kadd_mask8 These are missing from the Intel Intrinsics Guide, but are implemented by both gcc and icc. llvm-svn: 340879 --- clang/include/clang/Basic/BuiltinsX86.def | 4 ++++ clang/lib/CodeGen/CGBuiltin.cpp | 30 +++++++++++++++++++++++++++++- clang/lib/Headers/avx512bwintrin.h | 12 ++++++++++++ clang/lib/Headers/avx512dqintrin.h | 12 ++++++++++++ clang/test/CodeGen/avx512bw-builtins.c | 22 ++++++++++++++++++++++ clang/test/CodeGen/avx512dq-builtins.c | 22 ++++++++++++++++++++++ 6 files changed, 101 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index ce2288e..f227584 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1737,6 +1737,10 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx5 TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kadddi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 116863a..7bb1a65 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8613,7 +8613,7 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, LHS = CGF.Builder.CreateNot(LHS); return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), - CGF.Builder.getIntNTy(std::max(NumElts, 8U))); + Ops[0]->getType()); } static Value *EmitX86Select(CodeGenFunction &CGF, @@ -10031,6 +10031,34 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } + case X86::BI__builtin_ia32_kaddqi: + case X86::BI__builtin_ia32_kaddhi: + case X86::BI__builtin_ia32_kaddsi: + case X86::BI__builtin_ia32_kadddi: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_kaddqi: + IID = Intrinsic::x86_avx512_kadd_b; + break; + case X86::BI__builtin_ia32_kaddhi: + IID = Intrinsic::x86_avx512_kadd_w; + break; + case X86::BI__builtin_ia32_kaddsi: + IID = Intrinsic::x86_avx512_kadd_d; + break; + case X86::BI__builtin_ia32_kadddi: + IID = Intrinsic::x86_avx512_kadd_q; + break; + } + + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); + Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); + Function *Intr = CGM.getIntrinsic(IID); + Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); + return Builder.CreateBitCast(Res, Ops[0]->getType()); + } case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index a08833b..fdf2676 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -143,6 +143,18 @@ _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kadd_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kadd_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); +} + /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) \ diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 563e94b..a9a3579 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -86,6 +86,18 @@ _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); } +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_kadd_mask8(__mmask8 __A, __mmask8 __B) +{ + return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_kadd_mask16(__mmask16 __A, __mmask16 __B) +{ + return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); +} + static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c index 808b1c0..f1707a1 100644 --- a/clang/test/CodeGen/avx512bw-builtins.c +++ b/clang/test/CodeGen/avx512bw-builtins.c @@ -226,6 +226,28 @@ unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m5 _mm512_cmpneq_epu8_mask(__C, __D), CF); } +__mmask32 test_kadd_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kadd_mask32 + // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RES:%.*]] = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> [[LHS]], <32 x i1> [[RHS]]) + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kadd_mask32(_mm512_cmpneq_epu16_mask(__A, __B), + _mm512_cmpneq_epu16_mask(__C, __D)), + __E, __F); +} + +__mmask64 test_kadd_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kadd_mask64 + // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RES:%.*]] = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> [[LHS]], <64 x i1> [[RHS]]) + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kadd_mask64(_mm512_cmpneq_epu8_mask(__A, __B), + _mm512_cmpneq_epu8_mask(__C, __D)), + __E, __F); +} + __mmask64 test_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { // CHECK-LABEL: @test_mm512_cmpeq_epi8_mask // CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/avx512dq-builtins.c b/clang/test/CodeGen/avx512dq-builtins.c index ee339a0..f8f3e4b 100644 --- a/clang/test/CodeGen/avx512dq-builtins.c +++ b/clang/test/CodeGen/avx512dq-builtins.c @@ -114,6 +114,28 @@ unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m51 _mm512_cmpneq_epu64_mask(__C, __D), CF); } +__mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kadd_mask8 + // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]]) + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kadd_mask8(_mm512_cmpneq_epu64_mask(__A, __B), + _mm512_cmpneq_epu64_mask(__C, __D)), + __E, __F); +} + +__mmask16 test_kadd_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { + // CHECK-LABEL: @test_kadd_mask16 + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]]) + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kadd_mask16(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); +} + __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mullo_epi64 // CHECK: mul <8 x i64> -- 2.7.4