From: Simon Pilgrim Date: Sun, 20 Jan 2019 19:27:40 +0000 (+0000) Subject: [X86] Auto upgrade VPCOM/VPCOMU intrinsics to generic integer comparisons X-Git-Tag: llvmorg-10-init~14107 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e1143c1322e5f2eec6ca485553dddfad338ea412;p=platform%2Fupstream%2Fllvm.git [X86] Auto upgrade VPCOM/VPCOMU intrinsics to generic integer comparisons This causes a couple of changes in the upgrade tests as signed/unsigned eq/ne are equivalent and we constant fold true/false codes, these changes are the same as what we already do for avx512 cmp/ucmp. Noticed while cleaning up vector integer comparison costs for PR40376. llvm-svn: 351697 --- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index ea0b03b..3d41d23 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1908,31 +1908,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_xop_vpcomb : - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomw : - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomd : - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomq : - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomub : - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomuw : - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomud : - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomuq : - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vphaddbd : GCCBuiltin<"__builtin_ia32_vphaddbd">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index b28e9f5..e27d93e 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -361,8 +361,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "xop.vpcmov.256" || // Added in 5.0 Name.startswith("avx512.mask.move.s") || // Added in 4.0 Name.startswith("avx512.cvtmask2") || // Added in 5.0 - (Name.startswith("xop.vpcom") && // Added in 3.2 - F->arg_size() == 2) || + Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0 Name.startswith("xop.vprot") || // Added in 8.0 Name.startswith("avx512.prol") || // Added in 8.0 Name.startswith("avx512.pror") || // Added in 8.0 @@ -2038,26 +2037,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { else llvm_unreachable("Unknown suffix"); - Name = Name.substr(9); // strip off "xop.vpcom" unsigned Imm; - if (Name.startswith("lt")) - Imm = 0; - else if (Name.startswith("le")) - Imm = 1; - else if (Name.startswith("gt")) - Imm = 2; - else if (Name.startswith("ge")) - Imm = 3; - else if (Name.startswith("eq")) - Imm = 4; - else if (Name.startswith("ne")) - Imm = 5; - else if (Name.startswith("false")) - Imm = 6; - else if (Name.startswith("true")) - Imm = 7; - else - llvm_unreachable("Unknown condition"); + if (CI->getNumArgOperands() == 3) { + Imm = cast(CI->getArgOperand(2))->getZExtValue(); + } else { + Name = Name.substr(9); // strip off "xop.vpcom" + if (Name.startswith("lt")) + Imm = 0; + else if (Name.startswith("le")) + Imm = 1; + else if (Name.startswith("gt")) + Imm = 2; + else if (Name.startswith("ge")) + Imm = 3; + else if (Name.startswith("eq")) + Imm = 4; + else if (Name.startswith("ne")) + Imm = 5; + else if (Name.startswith("false")) + Imm = 6; + else if (Name.startswith("true")) + Imm = 7; + else + llvm_unreachable("Unknown condition"); + } + Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); } else if (IsX86 && Name.startswith("xop.vpcmov")) { Value *Sel = CI->getArgOperand(2); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 01a56a4..b032889 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1205,14 +1205,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP, X86ISD::GF2P8MULB, 0), - X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0), - X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0), - X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0), - X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), - X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), - X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), - X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), - X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 50cad61..e8b0d52 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1133,45 +1133,6 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II, return Builder.CreateShuffleVector(V1, V2, ShuffleMask); } -/// Decode XOP integer vector comparison intrinsics. -static Value *simplifyX86vpcom(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder, - bool IsSigned) { - if (auto *CInt = dyn_cast(II.getArgOperand(2))) { - uint64_t Imm = CInt->getZExtValue() & 0x7; - VectorType *VecTy = cast(II.getType()); - CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - - switch (Imm) { - case 0x0: - Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; - break; - case 0x1: - Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; - break; - case 0x2: - Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; - break; - case 0x3: - Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; - break; - case 0x4: - Pred = ICmpInst::ICMP_EQ; break; - case 0x5: - Pred = ICmpInst::ICMP_NE; break; - case 0x6: - return ConstantInt::getSigned(VecTy, 0); // FALSE - case 0x7: - return ConstantInt::getSigned(VecTy, -1); // TRUE - } - - if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0), - II.getArgOperand(1))) - return Builder.CreateSExtOrTrunc(Cmp, VecTy); - } - return nullptr; -} - static bool maskIsAllOneOrUndef(Value *Mask) { auto *ConstMask = dyn_cast(Mask); if (!ConstMask) @@ -3167,22 +3128,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return nullptr; break; - case Intrinsic::x86_xop_vpcomb: - case Intrinsic::x86_xop_vpcomd: - case Intrinsic::x86_xop_vpcomq: - case Intrinsic::x86_xop_vpcomw: - if (Value *V = simplifyX86vpcom(*II, Builder, true)) - return replaceInstUsesWith(*II, V); - break; - - case Intrinsic::x86_xop_vpcomub: - case Intrinsic::x86_xop_vpcomud: - case Intrinsic::x86_xop_vpcomuq: - case Intrinsic::x86_xop_vpcomuw: - if (Value *V = simplifyX86vpcom(*II, Builder, false)) - return replaceInstUsesWith(*II, V); - break; - case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating diff --git a/llvm/test/CodeGen/X86/commute-xop.ll b/llvm/test/CodeGen/X86/commute-xop.ll index 606ff12..789afbb 100644 --- a/llvm/test/CodeGen/X86/commute-xop.ll +++ b/llvm/test/CodeGen/X86/commute-xop.ll @@ -74,12 +74,12 @@ define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) { ; X32-LABEL: commute_fold_vpcomud: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vpcomequd (%eax), %xmm0, %xmm0 +; X32-NEXT: vpcomeqd (%eax), %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: commute_fold_vpcomud: ; X64: # %bb.0: -; X64-NEXT: vpcomequd (%rdi), %xmm0, %xmm0 +; X64-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %a0 %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd @@ -91,12 +91,12 @@ define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) { ; X32-LABEL: commute_fold_vpcomuq: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vpcomnequq (%eax), %xmm0, %xmm0 +; X32-NEXT: vpcomneqq (%eax), %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: commute_fold_vpcomuq: ; X64: # %bb.0: -; X64-NEXT: vpcomnequq (%rdi), %xmm0, %xmm0 +; X64-NEXT: vpcomneqq (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq %1 = load <2 x i64>, <2 x i64>* %a0 %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq @@ -107,13 +107,12 @@ declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readn define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) { ; X32-LABEL: commute_fold_vpcomuw: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vpcomfalseuw (%eax), %xmm0, %xmm0 +; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: commute_fold_vpcomuw: ; X64: # %bb.0: -; X64-NEXT: vpcomfalseuw (%rdi), %xmm0, %xmm0 +; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a0 %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw @@ -124,13 +123,12 @@ declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readn define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) { ; X32-LABEL: commute_fold_vpcomw: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vpcomtruew (%eax), %xmm0, %xmm0 +; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: commute_fold_vpcomw: ; X64: # %bb.0: -; X64-NEXT: vpcomtruew (%rdi), %xmm0, %xmm0 +; X64-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a0 %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew diff --git a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll index a9f237a..3d25e41 100644 --- a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll +++ b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll @@ -726,6 +726,86 @@ define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) { } declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomb: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomw: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomd: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomq: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone + +define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomub: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomuw: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomud: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: test_int_x86_xop_vpcomuq: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone + define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { ; CHECK-LABEL: test_int_x86_xop_vpcmov: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll index 80a3c5b..50e0578 100644 --- a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll +++ b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll @@ -663,84 +663,3 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) { ret <8 x float> %res } declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone - -define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomb: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ; - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone - -define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomw: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ; - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone - -define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomd: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ; - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone - -define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomq: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone - -define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomub: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ; - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone - -define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomuw: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ; - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone - -define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomud: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ; - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone - -define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) { -; CHECK-LABEL: test_int_x86_xop_vpcomuq: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone -