From 531ce2831193fae550826aec3fb5acdd73772f11 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 24 Oct 2016 04:04:24 +0000 Subject: [PATCH] [AVX-512] Replace 64-bit element and 512-bit vector pmin/pmax builtins with native IR like we do for 128/256-bit, but with the addition of masking. llvm-svn: 284956 --- clang/lib/CodeGen/CGBuiltin.cpp | 61 +++++++++++----- clang/test/CodeGen/avx512bw-builtins.c | 88 ++++++++++++++++------- clang/test/CodeGen/avx512f-builtins.c | 128 ++++++++++++++++++++++++++++----- clang/test/CodeGen/avx512vl-builtins.c | 88 ++++++++++++++++------- 4 files changed, 284 insertions(+), 81 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d787442..a81a75b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6979,6 +6979,18 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, std::max(NumElts, 8U))); } +static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, + ArrayRef Ops) { + Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); + Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + + if (Ops.size() == 2) + return Res; + + assert(Ops.size() == 4); + return EmitX86Select(CGF, Ops[3], Res, Ops[2]); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_ms_va_start || @@ -7511,43 +7523,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[1]); } - // TODO: Handle 64/512-bit vector widths of min/max. case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: + case X86::BI__builtin_ia32_pmaxsq128_mask: case X86::BI__builtin_ia32_pmaxsb256: case X86::BI__builtin_ia32_pmaxsw256: - case X86::BI__builtin_ia32_pmaxsd256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pmaxsd256: + case X86::BI__builtin_ia32_pmaxsq256_mask: + case X86::BI__builtin_ia32_pmaxsb512_mask: + case X86::BI__builtin_ia32_pmaxsw512_mask: + case X86::BI__builtin_ia32_pmaxsd512_mask: + case X86::BI__builtin_ia32_pmaxsq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); case X86::BI__builtin_ia32_pmaxub128: case X86::BI__builtin_ia32_pmaxuw128: case X86::BI__builtin_ia32_pmaxud128: + case X86::BI__builtin_ia32_pmaxuq128_mask: case X86::BI__builtin_ia32_pmaxub256: case X86::BI__builtin_ia32_pmaxuw256: - case X86::BI__builtin_ia32_pmaxud256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pmaxud256: + case X86::BI__builtin_ia32_pmaxuq256_mask: + case X86::BI__builtin_ia32_pmaxub512_mask: + case X86::BI__builtin_ia32_pmaxuw512_mask: + case X86::BI__builtin_ia32_pmaxud512_mask: + case X86::BI__builtin_ia32_pmaxuq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); case X86::BI__builtin_ia32_pminsb128: case X86::BI__builtin_ia32_pminsw128: case X86::BI__builtin_ia32_pminsd128: + case X86::BI__builtin_ia32_pminsq128_mask: case X86::BI__builtin_ia32_pminsb256: case X86::BI__builtin_ia32_pminsw256: - case X86::BI__builtin_ia32_pminsd256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pminsd256: + case X86::BI__builtin_ia32_pminsq256_mask: + case X86::BI__builtin_ia32_pminsb512_mask: + case X86::BI__builtin_ia32_pminsw512_mask: + case X86::BI__builtin_ia32_pminsd512_mask: + case X86::BI__builtin_ia32_pminsq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); case X86::BI__builtin_ia32_pminub128: case X86::BI__builtin_ia32_pminuw128: case X86::BI__builtin_ia32_pminud128: + case X86::BI__builtin_ia32_pminuq128_mask: case X86::BI__builtin_ia32_pminub256: case X86::BI__builtin_ia32_pminuw256: - case X86::BI__builtin_ia32_pminud256: { - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); - return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - } + case X86::BI__builtin_ia32_pminud256: + case X86::BI__builtin_ia32_pminuq256_mask: + case X86::BI__builtin_ia32_pminub512_mask: + case X86::BI__builtin_ia32_pminuw512_mask: + case X86::BI__builtin_ia32_pminud512_mask: + case X86::BI__builtin_ia32_pminuq512_mask: + return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); // 3DNow! case X86::BI__builtin_ia32_pswapdsf: diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c index c49dff6..3ec6c30 100644 --- a/clang/test/CodeGen/avx512bw-builtins.c +++ b/clang/test/CodeGen/avx512bw-builtins.c @@ -660,122 +660,162 @@ __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { } __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epi8 - // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512 + // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] return _mm512_max_epi8(__A,__B); } __m512i test_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi8 - // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512 + // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_max_epi8(__M,__A,__B); } __m512i test_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi8 - // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512 + // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_max_epi8(__W,__M,__A,__B); } __m512i test_mm512_max_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512 + // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] return _mm512_max_epi16(__A,__B); } __m512i test_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512 + // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_max_epi16(__M,__A,__B); } __m512i test_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512 + // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_max_epi16(__W,__M,__A,__B); } __m512i test_mm512_max_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epu8 - // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512 + // CHECK: [[CMP:%.*]] = icmp ugt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] return _mm512_max_epu8(__A,__B); } __m512i test_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu8 - // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512 + // CHECK: [[CMP:%.*]] = icmp ugt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_max_epu8(__M,__A,__B); } __m512i test_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu8 - // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512 + // CHECK: [[CMP:%.*]] = icmp ugt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_max_epu8(__W,__M,__A,__B); } __m512i test_mm512_max_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_max_epu16 - // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512 + // CHECK: [[CMP:%.*]] = icmp ugt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] return _mm512_max_epu16(__A,__B); } __m512i test_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu16 - // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512 + // CHECK: [[CMP:%.*]] = icmp ugt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_max_epu16(__M,__A,__B); } __m512i test_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu16 - // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512 + // CHECK: [[CMP:%.*]] = icmp ugt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_max_epu16(__W,__M,__A,__B); } __m512i test_mm512_min_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epi8 - // CHECK: @llvm.x86.avx512.mask.pmins.b.512 + // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] return _mm512_min_epi8(__A,__B); } __m512i test_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi8 - // CHECK: @llvm.x86.avx512.mask.pmins.b.512 + // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_min_epi8(__M,__A,__B); } __m512i test_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi8 - // CHECK: @llvm.x86.avx512.mask.pmins.b.512 + // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_min_epi8(__W,__M,__A,__B); } __m512i test_mm512_min_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epi16 - // CHECK: @llvm.x86.avx512.mask.pmins.w.512 + // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] return _mm512_min_epi16(__A,__B); } __m512i test_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi16 - // CHECK: @llvm.x86.avx512.mask.pmins.w.512 + // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_min_epi16(__M,__A,__B); } __m512i test_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi16 - // CHECK: @llvm.x86.avx512.mask.pmins.w.512 + // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_min_epi16(__W,__M,__A,__B); } __m512i test_mm512_min_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epu8 - // CHECK: @llvm.x86.avx512.mask.pminu.b.512 + // CHECK: [[CMP:%.*]] = icmp ult <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] return _mm512_min_epu8(__A,__B); } __m512i test_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu8 - // CHECK: @llvm.x86.avx512.mask.pminu.b.512 + // CHECK: [[CMP:%.*]] = icmp ult <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_min_epu8(__M,__A,__B); } __m512i test_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu8 - // CHECK: @llvm.x86.avx512.mask.pminu.b.512 + // CHECK: [[CMP:%.*]] = icmp ult <64 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[X]], <64 x i8> [[Y]] + // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_min_epu8(__W,__M,__A,__B); } __m512i test_mm512_min_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_min_epu16 - // CHECK: @llvm.x86.avx512.mask.pminu.w.512 + // CHECK: [[CMP:%.*]] = icmp ult <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] return _mm512_min_epu16(__A,__B); } __m512i test_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu16 - // CHECK: @llvm.x86.avx512.mask.pminu.w.512 + // CHECK: [[CMP:%.*]] = icmp ult <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_min_epu16(__M,__A,__B); } __m512i test_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu16 - // CHECK: @llvm.x86.avx512.mask.pminu.w.512 + // CHECK: [[CMP:%.*]] = icmp ult <32 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[X]], <32 x i16> [[Y]] + // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_min_epu16(__W,__M,__A,__B); } __m512i test_mm512_shuffle_epi8(__m512i __A, __m512i __B) { diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index bb85cf9..2858f98 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -7393,115 +7393,211 @@ __m512d test_mm512_roundscale_round_pd(__m512d __A) return _mm512_roundscale_round_pd(__A,3,_MM_FROUND_CUR_DIRECTION); } +__m512i test_mm512_max_epi32 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_max_epi32 + // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + return _mm512_max_epi32 (__A,__B); +} + __m512i test_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi32 - // CHECK: @llvm.x86.avx512.mask.pmaxs.d.512 + // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_max_epi32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi32 - // CHECK: @llvm.x86.avx512.mask.pmaxs.d.512 + // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_max_epi32 (__M,__A,__B); } +__m512i test_mm512_max_epi64 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_max_epi64 + // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + return _mm512_max_epi64 (__A,__B); +} + __m512i test_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.512 + // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_max_epi64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.512 + // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_max_epi64 (__M,__A,__B); } +__m512i test_mm512_max_epu64 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_max_epu64 + // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + return _mm512_max_epu64 (__A,__B); +} + __m512i test_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.512 + // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_max_epu64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.512 + // CHECK: [[CMP:%.*]] = icmp ugt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_max_epu64 (__M,__A,__B); } +__m512i test_mm512_max_epu32 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_max_epu32 + // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + return _mm512_max_epu32 (__A,__B); +} + __m512i test_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_max_epu32 - // CHECK: @llvm.x86.avx512.mask.pmaxu.d.512 + // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_max_epu32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_max_epu32 - // CHECK: @llvm.x86.avx512.mask.pmaxu.d.512 + // CHECK: [[CMP:%.*]] = icmp ugt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_max_epu32 (__M,__A,__B); } +__m512i test_mm512_min_epi32 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_min_epi32 + // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + return _mm512_min_epi32 (__A,__B); +} + __m512i test_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi32 - // CHECK: @llvm.x86.avx512.mask.pmins.d.512 + // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_min_epi32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi32 - // CHECK: @llvm.x86.avx512.mask.pmins.d.512 + // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_min_epi32 (__M,__A,__B); } +__m512i test_mm512_min_epu32 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_min_epu32 + // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + return _mm512_min_epu32 (__A,__B); +} + __m512i test_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu32 - // CHECK: @llvm.x86.avx512.mask.pminu.d.512 + // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_mask_min_epu32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu32 - // CHECK: @llvm.x86.avx512.mask.pminu.d.512 + // CHECK: [[CMP:%.*]] = icmp ult <16 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[X]], <16 x i32> [[Y]] + // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} return _mm512_maskz_min_epu32 (__M,__A,__B); } +__m512i test_mm512_min_epi64 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_min_epi64 + // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + return _mm512_min_epi64 (__A,__B); +} + __m512i test_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.512 + // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_min_epi64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.512 + // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_min_epi64 (__M,__A,__B); } +__m512i test_mm512_min_epu64 (__m512i __A, __m512i __B) +{ + // CHECK-LABEL: @test_mm512_min_epu64 + // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + return _mm512_min_epu64 (__A,__B); +} + __m512i test_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.512 + // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_mask_min_epu64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.512 + // CHECK: [[CMP:%.*]] = icmp ult <8 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[X]], <8 x i64> [[Y]] + // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} return _mm512_maskz_min_epu64 (__M,__A,__B); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index 0bbbe95..3eea49e 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -2547,32 +2547,42 @@ __m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128 + // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_max_epi64(__M,__A,__B); } __m128i test_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128 + // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_max_epi64(__W,__M,__A,__B); } __m128i test_mm_max_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128 + // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] return _mm_max_epi64(__A,__B); } __m256i test_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256 + // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_max_epi64(__M,__A,__B); } __m256i test_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256 + // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_max_epi64(__W,__M,__A,__B); } __m256i test_mm256_max_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_max_epi64 - // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256 + // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] return _mm256_max_epi64(__A,__B); } __m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { @@ -2605,32 +2615,42 @@ __m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128 + // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_max_epu64(__M,__A,__B); } __m128i test_mm_max_epu64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128 + // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] return _mm_max_epu64(__A,__B); } __m128i test_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128 + // CHECK: [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_max_epu64(__W,__M,__A,__B); } __m256i test_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256 + // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_max_epu64(__M,__A,__B); } __m256i test_mm256_max_epu64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256 + // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] return _mm256_max_epu64(__A,__B); } __m256i test_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_max_epu64 - // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256 + // CHECK: [[CMP:%.*]] = icmp ugt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_max_epu64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { @@ -2663,32 +2683,42 @@ __m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_min_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.128 + // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] return _mm_min_epi64(__A,__B); } __m128i test_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.128 + // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_min_epi64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.128 + // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_min_epi64(__M,__A,__B); } __m256i test_mm256_min_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.256 + // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] return _mm256_min_epi64(__A,__B); } __m256i test_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.256 + // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_min_epi64(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epi64 - // CHECK: @llvm.x86.avx512.mask.pmins.q.256 + // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_min_epi64(__M,__A,__B); } __m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { @@ -2721,32 +2751,42 @@ __m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256 } __m128i test_mm_min_epu64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.128 + // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] return _mm_min_epu64(__A,__B); } __m128i test_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.128 + // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_min_epu64(__W,__M,__A,__B); } __m128i test_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.128 + // CHECK: [[CMP:%.*]] = icmp ult <2 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[X]], <2 x i64> [[Y]] + // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_min_epu64(__M,__A,__B); } __m256i test_mm256_min_epu64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.256 + // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] return _mm256_min_epu64(__A,__B); } __m256i test_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.256 + // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_min_epu64(__W,__M,__A,__B); } __m256i test_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_min_epu64 - // CHECK: @llvm.x86.avx512.mask.pminu.q.256 + // CHECK: [[CMP:%.*]] = icmp ult <4 x i64> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[X]], <4 x i64> [[Y]] + // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_min_epu64(__M,__A,__B); } __m128d test_mm_roundscale_pd(__m128d __A) { -- 2.7.4