From: Eric Christopher Date: Wed, 29 Jul 2009 00:28:05 +0000 (+0000) Subject: Add support for gcc __builtin_ia32_ptest{z,c,nzc} intrinsics. Lower X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f7802a33ce06554c2eb6959b699e35f4818ac1df;p=platform%2Fupstream%2Fllvm.git Add support for gcc __builtin_ia32_ptest{z,c,nzc} intrinsics. Lower to ptest instruction plus setcc. Revamp ptest instruction. Add test. llvm-svn: 77407 --- diff --git a/llvm/include/llvm/IntrinsicsX86.td b/llvm/include/llvm/IntrinsicsX86.td index 3d43183..11cfe97 100644 --- a/llvm/include/llvm/IntrinsicsX86.td +++ b/llvm/include/llvm/IntrinsicsX86.td @@ -864,6 +864,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>; } +// Test instruction with bitwise comparison. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +} //===----------------------------------------------------------------------===// // MMX diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2322814..796f073 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6200,6 +6200,36 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // ptest intrinsics. The intrinsic these come from are designed to return + // a boolean value, not just an instruction so lower it to the ptest + // pattern and a conditional move to the result. + case Intrinsic::x86_sse41_ptestz: + case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_sse41_ptestnzc:{ + unsigned X86CC = 0; + switch (IntNo) { + default: break; + case Intrinsic::x86_sse41_ptestz: + // ZF = 1 + X86CC = X86::COND_E; + break; + case Intrinsic::x86_sse41_ptestc: + // CF = 1 + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse41_ptestnzc: + // ZF and CF = 0 + X86CC = X86::COND_A; + break; + } + + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); + SDValue CC = DAG.getConstant(X86CC, MVT::i8); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } // Fix vector shift instructions where the last operand is a non-immediate // i32 value. @@ -7048,6 +7078,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INC: return "X86ISD::INC"; case X86ISD::DEC: return "X86ISD::DEC"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; + case X86ISD::PTEST: return "X86ISD::PTEST"; } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 83939cb..c3da894 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -244,7 +244,10 @@ namespace llvm { INC, DEC, // MUL_IMM - X86 specific multiply by immediate. - MUL_IMM + MUL_IMM, + + // PTEST - Vector bitwise comparisons + PTEST }; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index eb26ac0..853f88e 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -69,6 +69,9 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; +def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4f32>]>; +def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -3618,11 +3621,17 @@ defm INSERTPS : SS41I_insertf32<0x21, "insertps">; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; +// ptest instruction we'll lower to this in X86ISelLowering primarily from +// the intel intrinsic that corresponds to this. let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; + "ptest \t{$src2, $src1|$src1, $src2}", + [(X86ptest VR128:$src1, VR128:$src2), + (implicit EFLAGS)]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; + "ptest \t{$src2, $src1|$src1, $src2}", + [(X86ptest VR128:$src1, (load addr:$src2)), + (implicit EFLAGS)]>, OpSize; } def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 6c093a8..19a586f 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -181,4 +181,19 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: _insertps_3: ; X64: insertps $0, %xmm1, %xmm0 -} \ No newline at end of file +} + +define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone + ret i32 %tmp1 +; X32: _ptestz_1: +; X32: ptest %xmm1, %xmm0 +; X32: sete %al + +; X64: _ptestz_1: +; X64: ptest %xmm1, %xmm0 +; X64: sete %al +} + +declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone +