From f236bb1b5bea26acbcb39f8232e64c638904bc82 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 3 Jul 2014 15:06:47 +0000 Subject: [PATCH] Fix ppcf128 component access on little-endian systems The PowerPC 128-bit long double data type (ppcf128 in LLVM) is in fact a pair of two doubles, where one is considered the "high" or more-significant part, and the other is considered the "low" or less-significant part. When a ppcf128 value is stored in memory or a register pair, the high part always comes first, i.e. at the lower memory address or in the lower-numbered register, and the low part always comes second. This is true both on big-endian and little-endian PowerPC systems. (Similar to how with a complex number, the real part always comes first and the imaginary part second, no matter the byte order of the system.) This was implemented incorrectly for little-endian systems in LLVM. This commit fixes three related issues: - When printing an immediate ppcf128 constant to assembler output in emitGlobalConstantFP, emit the high part first on both big- and little-endian systems. - When lowering a ppcf128 type to a pair of f64 types in SelectionDAG (which is used e.g. when generating code to load an argument into a register pair), use correct low/high part ordering on little-endian systems. - In a related issue, because lowering ppcf128 into a pair of f64 must operate differently from lowering an int128 into a pair of i64, bitcasts between ppcf128 and int128 must not be optimized away by the DAG combiner on little-endian systems, but must effect a word-swap. Reviewed by Hal Finkel. llvm-svn: 212274 --- llvm/include/llvm/Target/TargetLowering.h | 7 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 + .../CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 22 +-- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +- llvm/test/CodeGen/PowerPC/ppcf128-endian.ll | 154 +++++++++++++++++++++ llvm/test/CodeGen/X86/float-asmprint.ll | 5 +- 7 files changed, 183 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/ppcf128-endian.ll diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 2695bc5..7caa55d 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -716,6 +716,13 @@ public: /// reduce runtime. virtual bool ShouldShrinkFPConstant(EVT) const { return true; } + /// When splitting a value of the specified type into parts, does the Lo + /// or Hi part come first? This usually follows the endianness, except + /// for ppcf128, where the Hi part always comes first. + bool hasBigEndianPartOrdering(EVT VT) const { + return isBigEndian() || VT == MVT::ppcf128; + } + /// If true, the target has custom DAG combine transformations that it can /// perform for the specified node. bool hasTargetDAGCombine(ISD::NodeType NT) const { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5a809aa..f80fdea 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1881,7 +1881,8 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + if (AP.TM.getDataLayout()->isBigEndian() && + !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0f50184..7198203 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6210,6 +6210,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType()) == + TLI.hasBigEndianPartOrdering(VT) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast(N0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index f40ed76..7e2f7b6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -60,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(InVT) != + TLI.hasBigEndianPartOrdering(OutVT)) + std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -82,7 +85,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -176,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); } @@ -245,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDLoc dl(N); LoadSDNode *LD = cast(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT ValueVT = LD->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); unsigned Alignment = LD->getAlignment(); @@ -275,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi.getValue(1)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -295,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -459,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDLoc dl(N); StoreSDNode *St = cast(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), - St->getValue().getValueType()); + EVT ValueVT = St->getValue().getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); unsigned Alignment = St->getAlignment(); @@ -474,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 249683f..28d8e98 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -169,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll new file mode 100644 index 0000000..2a5f13a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll @@ -0,0 +1,154 @@ +; RUN: llc -mcpu=pwr7 -mattr=+altivec < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +@g = common global ppc_fp128 0xM00000000000000000000000000000000, align 16 + +define void @callee(ppc_fp128 %x) { +entry: + %x.addr = alloca ppc_fp128, align 16 + store ppc_fp128 %x, ppc_fp128* %x.addr, align 16 + %0 = load ppc_fp128* %x.addr, align 16 + store ppc_fp128 %0, ppc_fp128* @g, align 16 + ret void +} +; CHECK: @callee +; CHECK: ld [[REG:[0-9]+]], .LC +; CHECK: stfd 2, 8([[REG]]) +; CHECK: stfd 1, 0([[REG]]) +; CHECK: blr + +define void @caller() { +entry: + %0 = load ppc_fp128* @g, align 16 + call void @test(ppc_fp128 %0) + ret void +} +; CHECK: @caller +; CHECK: ld [[REG:[0-9]+]], .LC +; CHECK: lfd 2, 8([[REG]]) +; CHECK: lfd 1, 0([[REG]]) +; CHECK: bl test + +declare void @test(ppc_fp128) + +define void @caller_const() { +entry: + call void @test(ppc_fp128 0xM3FF00000000000000000000000000000) + ret void +} +; CHECK: .LCPI[[LC:[0-9]+]]_0: +; CHECK: .long 1065353216 +; CHECK: .LCPI[[LC]]_1: +; CHECK: .long 0 +; CHECK: @caller_const +; CHECK: addi [[REG0:[0-9]+]], {{[0-9]+}}, .LCPI[[LC]]_0 +; CHECK: addi [[REG1:[0-9]+]], {{[0-9]+}}, .LCPI[[LC]]_1 +; CHECK: lfs 1, 0([[REG0]]) +; CHECK: lfs 2, 0([[REG1]]) +; CHECK: bl test + +define ppc_fp128 @result() { +entry: + %0 = load ppc_fp128* @g, align 16 + ret ppc_fp128 %0 +} +; CHECK: @result +; CHECK: ld [[REG:[0-9]+]], .LC +; CHECK: lfd 1, 0([[REG]]) +; CHECK: lfd 2, 8([[REG]]) +; CHECK: blr + +define void @use_result() { +entry: + %call = tail call ppc_fp128 @test_result() #3 + store ppc_fp128 %call, ppc_fp128* @g, align 16 + ret void +} +; CHECK: @use_result +; CHECK: bl test_result +; CHECK: ld [[REG:[0-9]+]], .LC +; CHECK: stfd 2, 8([[REG]]) +; CHECK: stfd 1, 0([[REG]]) +; CHECK: blr + +declare ppc_fp128 @test_result() + +define void @caller_result() { +entry: + %call = tail call ppc_fp128 @test_result() + tail call void @test(ppc_fp128 %call) + ret void +} +; CHECK: @caller_result +; CHECK: bl test_result +; CHECK-NEXT: nop +; CHECK-NEXT: bl test +; CHECK-NEXT: nop + +define i128 @convert_from(ppc_fp128 %x) { +entry: + %0 = bitcast ppc_fp128 %x to i128 + ret i128 %0 +} +; CHECK: @convert_from +; CHECK: stfd 1, [[OFF1:.*]](1) +; CHECK: stfd 2, [[OFF2:.*]](1) +; CHECK: ld 3, [[OFF1]](1) +; CHECK: ld 4, [[OFF2]](1) +; CHECK: blr + +define ppc_fp128 @convert_to(i128 %x) { +entry: + %0 = bitcast i128 %x to ppc_fp128 + ret ppc_fp128 %0 +} +; CHECK: @convert_to +; CHECK: std 3, [[OFF1:.*]](1) +; CHECK: std 4, [[OFF2:.*]](1) +; CHECK: lfd 1, [[OFF1]](1) +; CHECK: lfd 2, [[OFF2]](1) +; CHECK: blr + +define ppc_fp128 @convert_to2(i128 %x) { +entry: + %shl = shl i128 %x, 1 + %0 = bitcast i128 %shl to ppc_fp128 + ret ppc_fp128 %0 +} + +; CHECK: @convert_to +; CHECK: std 3, [[OFF1:.*]](1) +; CHECK: std 4, [[OFF2:.*]](1) +; CHECK: lfd 1, [[OFF1]](1) +; CHECK: lfd 2, [[OFF2]](1) +; CHECK: blr + +define double @convert_vector(<4 x i32> %x) { +entry: + %cast = bitcast <4 x i32> %x to ppc_fp128 + %conv = fptrunc ppc_fp128 %cast to double + ret double %conv +} +; CHECK: @convert_vector +; CHECK: addi [[REG:[0-9]+]], 1, [[OFF:.*]] +; CHECK: stvx 2, 0, [[REG]] +; CHECK: lfd 1, [[OFF]](1) +; CHECK: blr + +declare void @llvm.va_start(i8*) + +define double @vararg(i32 %a, ...) { +entry: + %va = alloca i8*, align 8 + %va1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %va1) + %arg = va_arg i8** %va, ppc_fp128 + %conv = fptrunc ppc_fp128 %arg to double + ret double %conv +} +; CHECK: @vararg +; CHECK: lfd 1, 0({{[0-9]+}}) +; CHECK: blr + diff --git a/llvm/test/CodeGen/X86/float-asmprint.ll b/llvm/test/CodeGen/X86/float-asmprint.ll index 4aeae7f..5de9700 100644 --- a/llvm/test/CodeGen/X86/float-asmprint.ll +++ b/llvm/test/CodeGen/X86/float-asmprint.ll @@ -16,8 +16,9 @@ ; CHECK-NEXT: .size ; CHECK: varppc128: -; CHECK-NEXT: .quad 0 # ppc_fp128 -0 -; CHECK-NEXT: .quad -9223372036854775808 +; For ppc_fp128, the high double always comes first. +; CHECK-NEXT: .quad -9223372036854775808 # ppc_fp128 -0 +; CHECK-NEXT: .quad 0 ; CHECK-NEXT: .size ; CHECK: var80: -- 2.7.4