From f8bfe21fad8f4c1ca41e583ef6fd7aae5b17e95e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 18 Jul 2014 13:07:05 +0000 Subject: [PATCH] AArch64: implement efficient f16 bitcasts Because i16 is illegal, there's no native DAG method to represent a bitcast to or from an f16 type. This meant LLVM was inserting a stack store/load pair which is really not ideal. llvm-svn: 213378 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 43 +++++++++++++++++++++++++ llvm/test/CodeGen/AArch64/half.ll | 21 ++++++++++++ 2 files changed, 64 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9e64ea8..4921826 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -317,6 +317,10 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f16, Expand); + + setOperationAction(ISD::BITCAST, MVT::i16, Custom); + setOperationAction(ISD::BITCAST, MVT::f16, Custom); + // Indexed loads and stores are supported. for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -1510,12 +1514,30 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, return CallResult.first; } +static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) { + if (Op.getValueType() != MVT::f16) + return SDValue(); + + assert(Op.getOperand(0).getValueType() == MVT::i16); + SDLoc DL(Op); + + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0)); + Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op); + return SDValue( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op, + DAG.getTargetConstant(AArch64::hsub, MVT::i32)), + 0); +} + + SDValue AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("unimplemented operand"); return SDValue(); + case ISD::BITCAST: + return LowerBITCAST(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: @@ -7942,11 +7964,32 @@ bool AArch64TargetLowering::getPostIndexedAddressParts( return true; } +static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) { + if (N->getValueType(0) != MVT::i16) + return; + + SDLoc DL(N); + SDValue Op = N->getOperand(0); + assert(Op.getValueType() == MVT::f16 && + "Inconsistent bitcast? Only 16-bit types should be i16 or f16"); + Op = SDValue( + DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32, + DAG.getUNDEF(MVT::i32), Op, + DAG.getTargetConstant(AArch64::hsub, MVT::i32)), + 0); + Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op)); +} + void AArch64TargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this"); + case ISD::BITCAST: + ReplaceBITCASTResults(N, Results, DAG); + return; case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); diff --git a/llvm/test/CodeGen/AArch64/half.ll b/llvm/test/CodeGen/AArch64/half.ll index 1ad255b..a46094b 100644 --- a/llvm/test/CodeGen/AArch64/half.ll +++ b/llvm/test/CodeGen/AArch64/half.ll @@ -17,6 +17,16 @@ define i16 @test_bitcast_from_half(half* %addr) { ret i16 %val_int } +define i16 @test_reg_bitcast_from_half(half %in) { +; CHECK-LABEL: test_reg_bitcast_from_half: +; CHECK-NOT: str +; CHECK-NOT: ldr +; CHECK-DAG: fmov w0, s0 +; CHECK: ret + %val = bitcast half %in to i16 + ret i16 %val +} + define void @test_bitcast_to_half(half* %addr, i16 %in) { ; CHECK-LABEL: test_bitcast_to_half: ; CHECK: strh w1, [x0] @@ -25,6 +35,17 @@ define void @test_bitcast_to_half(half* %addr, i16 %in) { ret void } +define half @test_reg_bitcast_to_half(i16 %in) { +; CHECK-LABEL: test_reg_bitcast_to_half: +; CHECK-NOT: str +; CHECK-NOT: ldr +; CHECK-DAG: fmov s0, w0 +; CHECK: ret + + %val = bitcast i16 %in to half + ret half %val +} + define float @test_extend32(half* %addr) { ; CHECK-LABEL: test_extend32: ; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}} -- 2.7.4