From: Joey Gouly Date: Fri, 19 Jul 2013 16:34:16 +0000 (+0000) Subject: [ARMv8] Implement the NEON instructions VRINT{N, X, A, Z, M, P}. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e8602558501771f4c47d176c45150a203d7a053e;p=platform%2Fupstream%2Fllvm.git [ARMv8] Implement the NEON instructions VRINT{N, X, A, Z, M, P}. llvm-svn: 186688 --- diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 7849e94..3c5d5ff 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -364,6 +364,14 @@ def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic; def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic; def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic; +// Vector Rounding +def int_arm_neon_vrintn : Neon_1Arg_Intrinsic; +def int_arm_neon_vrintx : Neon_1Arg_Intrinsic; +def int_arm_neon_vrinta : Neon_1Arg_Intrinsic; +def int_arm_neon_vrintz : Neon_1Arg_Intrinsic; +def int_arm_neon_vrintm : Neon_1Arg_Intrinsic; +def int_arm_neon_vrintp : Neon_1Arg_Intrinsic; + // De-interleaving vector loads from N-element structures. // Source operands are the address and alignment. def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty], diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 0836700..3e2ab06 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5738,6 +5738,34 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +// VRINT : Vector Rounding +multiclass VRINT_FPI op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + } + + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>; + def : InstAlias(NAME#"Q") QPR:$Qd, QPR:$Qm)>; +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f114b7a..7061ede 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -263,6 +263,8 @@ class ARMAsmParser : public MCTargetAsmParser { const SmallVectorImpl &Ops); bool shouldOmitCCOutOperand(StringRef Mnemonic, SmallVectorImpl &Operands); + bool shouldOmitPredicateOperand(StringRef Mnemonic, + SmallVectorImpl &Operands); public: enum ARMMatchResultTy { @@ -5157,6 +5159,25 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, return false; } +bool ARMAsmParser::shouldOmitPredicateOperand( + StringRef Mnemonic, SmallVectorImpl &Operands) { + // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON + unsigned RegIdx = 3; + if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && + static_cast(Operands[2])->getToken() == ".f32") { + if (static_cast(Operands[3])->isToken() && + static_cast(Operands[3])->getToken() == ".f32") + RegIdx = 4; + + if (static_cast(Operands[RegIdx])->isReg() && + (ARMMCRegisterClasses[ARM::DPRRegClassID] + .contains(static_cast(Operands[RegIdx])->getReg()) || + ARMMCRegisterClasses[ARM::QPRRegClassID] + .contains(static_cast(Operands[RegIdx])->getReg()))) + return true; + } +} + static bool isDataTypeToken(StringRef Tok) { return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || @@ -5359,6 +5380,15 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, delete Op; } + // Some instructions have the same mnemonic, but don't always + // have a predicate. Distinguish them here and delete the + // predicate if needed. + if (shouldOmitPredicateOperand(Mnemonic, Operands)) { + ARMOperand *Op = static_cast(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + // ARM mode 'blx' need special handling, as the register operand version // is predicable, but the label operand version is not. So, we can't rely // on the Mnemonic based checking to correctly figure out when to put diff --git a/llvm/test/MC/ARM/neon-v8.s b/llvm/test/MC/ARM/neon-v8.s index 06a22f7..429c8e3 100644 --- a/llvm/test/MC/ARM/neon-v8.s +++ b/llvm/test/MC/ARM/neon-v8.s @@ -44,3 +44,40 @@ vcvtp.s32.f32 q4, q15 @ CHECK: vcvtp.s32.f32 q4, q15 @ encoding: [0x6e,0x82,0xbb,0xf3] vcvtp.u32.f32 q9, q8 @ CHECK: vcvtp.u32.f32 q9, q8 @ encoding: [0xe0,0x22,0xfb,0xf3] + +vrintn.f32 d3, d0 +@ CHECK: vrintn.f32 d3, d0 @ encoding: [0x00,0x34,0xba,0xf3] +vrintn.f32 q1, q4 +@ CHECK: vrintn.f32 q1, q4 @ encoding: [0x48,0x24,0xba,0xf3] +vrintx.f32 d5, d12 +@ CHECK: vrintx.f32 d5, d12 @ encoding: [0x8c,0x54,0xba,0xf3] +vrintx.f32 q0, q3 +@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xc6,0x04,0xba,0xf3] +vrinta.f32 d3, d0 +@ CHECK: vrinta.f32 d3, d0 @ encoding: [0x00,0x35,0xba,0xf3] +vrinta.f32 q8, q2 +@ CHECK: vrinta.f32 q8, q2 @ encoding: [0x44,0x05,0xfa,0xf3] +vrintz.f32 d12, d18 +@ CHECK: vrintz.f32 d12, d18 @ encoding: [0xa2,0xc5,0xba,0xf3] +vrintz.f32 q9, q4 +@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xc8,0x25,0xfa,0xf3] +vrintm.f32 d3, d0 +@ CHECK: vrintm.f32 d3, d0 @ encoding: [0x80,0x36,0xba,0xf3] +vrintm.f32 q1, q4 +@ CHECK: vrintm.f32 q1, q4 @ encoding: [0xc8,0x26,0xba,0xf3] +vrintp.f32 d3, d0 +@ CHECK: vrintp.f32 d3, d0 @ encoding: [0x80,0x37,0xba,0xf3] +vrintp.f32 q1, q4 +@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xc8,0x27,0xba,0xf3] + +@ test the aliases of vrint +vrintn.f32.f32 d3, d0 +@ CHECK: vrintn.f32 d3, d0 @ encoding: [0x00,0x34,0xba,0xf3] +vrintx.f32.f32 q0, q3 +@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xc6,0x04,0xba,0xf3] +vrinta.f32.f32 d3, d0 +@ CHECK: vrinta.f32 d3, d0 @ encoding: [0x00,0x35,0xba,0xf3] +vrintz.f32.f32 q9, q4 +@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xc8,0x25,0xfa,0xf3] +vrintp.f32.f32 q1, q4 +@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xc8,0x27,0xba,0xf3] diff --git a/llvm/test/MC/ARM/thumb-neon-v8.s b/llvm/test/MC/ARM/thumb-neon-v8.s index df40238..5b32781 100644 --- a/llvm/test/MC/ARM/thumb-neon-v8.s +++ b/llvm/test/MC/ARM/thumb-neon-v8.s @@ -44,3 +44,40 @@ vcvtp.s32.f32 q4, q15 @ CHECK: vcvtp.s32.f32 q4, q15 @ encoding: [0xbb,0xff,0x6e,0x82] vcvtp.u32.f32 q9, q8 @ CHECK: vcvtp.u32.f32 q9, q8 @ encoding: [0xfb,0xff,0xe0,0x22] + +vrintn.f32 d3, d0 +@ CHECK: vrintn.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x34] +vrintn.f32 q1, q4 +@ CHECK: vrintn.f32 q1, q4 @ encoding: [0xba,0xff,0x48,0x24] +vrintx.f32 d5, d12 +@ CHECK: vrintx.f32 d5, d12 @ encoding: [0xba,0xff,0x8c,0x54] +vrintx.f32 q0, q3 +@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xba,0xff,0xc6,0x04] +vrinta.f32 d3, d0 +@ CHECK: vrinta.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x35] +vrinta.f32 q8, q2 +@ CHECK: vrinta.f32 q8, q2 @ encoding: [0xfa,0xff,0x44,0x05] +vrintz.f32 d12, d18 +@ CHECK: vrintz.f32 d12, d18 @ encoding: [0xba,0xff,0xa2,0xc5] +vrintz.f32 q9, q4 +@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xfa,0xff,0xc8,0x25] +vrintm.f32 d3, d0 +@ CHECK: vrintm.f32 d3, d0 @ encoding: [0xba,0xff,0x80,0x36] +vrintm.f32 q1, q4 +@ CHECK: vrintm.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x26] +vrintp.f32 d3, d0 +@ CHECK: vrintp.f32 d3, d0 @ encoding: [0xba,0xff,0x80,0x37] +vrintp.f32 q1, q4 +@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x27] + +@ test the aliases of vrint +vrintn.f32.f32 d3, d0 +@ CHECK: vrintn.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x34] +vrintx.f32.f32 q0, q3 +@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xba,0xff,0xc6,0x04] +vrinta.f32.f32 d3, d0 +@ CHECK: vrinta.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x35] +vrintz.f32.f32 q9, q4 +@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xfa,0xff,0xc8,0x25] +vrintp.f32.f32 q1, q4 +@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x27] diff --git a/llvm/test/MC/Disassembler/ARM/neon-v8.txt b/llvm/test/MC/Disassembler/ARM/neon-v8.txt index 4e9bf3f7..8c6e689 100644 --- a/llvm/test/MC/Disassembler/ARM/neon-v8.txt +++ b/llvm/test/MC/Disassembler/ARM/neon-v8.txt @@ -44,3 +44,28 @@ # CHECK: vcvtp.s32.f32 q4, q15 0xe0 0x22 0xfb 0xf3 # CHECK: vcvtp.u32.f32 q9, q8 + +0x00 0x34 0xba 0xf3 +# CHECK: vrintn.f32 d3, d0 +0x48 0x24 0xba 0xf3 +# CHECK: vrintn.f32 q1, q4 +0x8c 0x54 0xba 0xf3 +# CHECK: vrintx.f32 d5, d12 +0xc6 0x04 0xba 0xf3 +# CHECK: vrintx.f32 q0, q3 +0x00 0x35 0xba 0xf3 +# CHECK: vrinta.f32 d3, d0 +0x44 0x05 0xfa 0xf3 +# CHECK: vrinta.f32 q8, q2 +0xa2 0xc5 0xba 0xf3 +# CHECK: vrintz.f32 d12, d18 +0xc8 0x25 0xfa 0xf3 +# CHECK: vrintz.f32 q9, q4 +0x80 0x36 0xba 0xf3 +# CHECK: vrintm.f32 d3, d0 +0xc8 0x26 0xba 0xf3 +# CHECK: vrintm.f32 q1, q4 +0x80 0x37 0xba 0xf3 +# CHECK: vrintp.f32 d3, d0 +0xc8 0x27 0xba 0xf3 +# CHECK: vrintp.f32 q1, q4 diff --git a/llvm/test/MC/Disassembler/ARM/thumb-neon-v8.txt b/llvm/test/MC/Disassembler/ARM/thumb-neon-v8.txt index f025b8b..27c09ea 100644 --- a/llvm/test/MC/Disassembler/ARM/thumb-neon-v8.txt +++ b/llvm/test/MC/Disassembler/ARM/thumb-neon-v8.txt @@ -44,3 +44,28 @@ # CHECK: vcvtp.s32.f32 q4, q15 0xfb 0xff 0xe0 0x22 # CHECK: vcvtp.u32.f32 q9, q8 + +0xba 0xff 0x00 0x34 +# CHECK: vrintn.f32 d3, d0 +0xba 0xff 0x48 0x24 +# CHECK: vrintn.f32 q1, q4 +0xba 0xff 0x8c 0x54 +# CHECK: vrintx.f32 d5, d12 +0xba 0xff 0xc6 0x04 +# CHECK: vrintx.f32 q0, q3 +0xba 0xff 0x00 0x35 +# CHECK: vrinta.f32 d3, d0 +0xfa 0xff 0x44 0x05 +# CHECK: vrinta.f32 q8, q2 +0xba 0xff 0xa2 0xc5 +# CHECK: vrintz.f32 d12, d18 +0xfa 0xff 0xc8 0x25 +# CHECK: vrintz.f32 q9, q4 +0xba 0xff 0x80 0x36 +# CHECK: vrintm.f32 d3, d0 +0xba 0xff 0xc8 0x26 +# CHECK: vrintm.f32 q1, q4 +0xba 0xff 0x80 0x37 +# CHECK: vrintp.f32 d3, d0 +0xba 0xff 0xc8 0x27 +# CHECK: vrintp.f32 q1, q4