From 740086596c93952113220ed0c2fadcce0fa44832 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Wed, 22 Sep 2021 11:44:55 -0500 Subject: [PATCH] [PowerPC] Fix issue with lowering byval parameters. Lowering of byval parameters with sizes that are not represented by a single store require multiple stores to properly address the correct size of the parameter. Sizes that cannot be done with a single store are 3 bytes, 5 bytes, 6 bytes, 7 bytes. It is not correct to simply perform an 8 byte store and for these elements because then the store would be larger than the element and alias analysis would assume that this is undefined behaivour and return NoAlias for them. This patch adds the correct stores so that the size of the store is not larger than the size of the element. Reviewed By: nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D108795 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 19 +- llvm/test/CodeGen/PowerPC/jaggedstructs.ll | 19 +- .../CodeGen/PowerPC/ppc64-byval-multi-store.ll | 228 ++++++++++++++------- llvm/test/CodeGen/PowerPC/structsinregs.ll | 24 ++- 4 files changed, 189 insertions(+), 101 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d9c9930..4f6d119 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4369,21 +4369,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store; - - if (ObjSize==1 || ObjSize==2 || ObjSize==4) { - EVT ObjType = (ObjSize == 1 ? MVT::i8 : - (ObjSize == 2 ? MVT::i16 : MVT::i32)); - Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, - MachinePointerInfo(&*FuncArg), ObjType); - } else { - // For sizes that don't fit a truncating store (3, 5, 6, 7), - // store the whole register as-is to the parameter save area - // slot. - Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(&*FuncArg)); - } - + EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8); + SDValue Store = + DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, + MachinePointerInfo(&*FuncArg), ObjType); MemOps.push_back(Store); } // Whether we copied from a register or not, advance the offset diff --git a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll index 18ec014..f897f4a 100644 --- a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll +++ b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll @@ -18,10 +18,21 @@ entry: ret void } -; CHECK-DAG: std 3, 160(1) -; CHECK-DAG: std 6, 184(1) -; CHECK-DAG: std 5, 176(1) -; CHECK-DAG: std 4, 168(1) +; CHECK-LABEL: test +; CHECK: stb 6, 191(1) +; CHECK: rldicl 7, 6, 56, 8 +; CHECK: sth 7, 189(1) +; CHECK: rldicl 6, 6, 40, 24 +; CHECK: stw 6, 185(1) +; CHECK: sth 5, 182(1) +; CHECK: rldicl 5, 5, 48, 16 +; CHECK: stw 5, 178(1) +; CHECK: stb 4, 175(1) +; CHECK: rldicl 4, 4, 56, 8 +; CHECK: stw 4, 171(1) +; CHECK: stb 3, 167(1) +; CHECK: rldicl 3, 3, 56, 8 +; CHECK: sth 3, 165(1) ; CHECK-DAG: lbz {{[0-9]+}}, 167(1) ; CHECK-DAG: lhz {{[0-9]+}}, 165(1) ; CHECK-DAG: stb {{[0-9]+}}, 55(1) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll index 7bd6cf8..89cb3c2 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll @@ -244,8 +244,11 @@ define signext i8 @caller_3([3 x i8]* nocapture readonly byval([3 x i8]) %data) ; P8LE-NEXT: mflr r0 ; P8LE-NEXT: std r0, 16(r1) ; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: rldicl r4, r3, 48, 16 +; P8LE-NEXT: sth r3, 48(r1) +; P8LE-NEXT: stb r4, 50(r1) +; P8LE-NEXT: lhz r3, 48(r1) ; P8LE-NEXT: lbz r4, 50(r1) -; P8LE-NEXT: std r3, 48(r1) ; P8LE-NEXT: sth r3, 61(r1) ; P8LE-NEXT: addi r3, r1, 61 ; P8LE-NEXT: stb r4, 63(r1) @@ -262,8 +265,11 @@ define signext i8 @caller_3([3 x i8]* nocapture readonly byval([3 x i8]) %data) ; P9LE-NEXT: mflr r0 ; P9LE-NEXT: std r0, 16(r1) ; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: sth r3, 48(r1) +; P9LE-NEXT: rldicl r3, r3, 48, 16 +; P9LE-NEXT: stb r3, 50(r1) +; P9LE-NEXT: lhz r3, 48(r1) ; P9LE-NEXT: lbz r4, 50(r1) -; P9LE-NEXT: std r3, 48(r1) ; P9LE-NEXT: sth r3, 61(r1) ; P9LE-NEXT: addi r3, r1, 61 ; P9LE-NEXT: stb r4, 63(r1) @@ -280,8 +286,11 @@ define signext i8 @caller_3([3 x i8]* nocapture readonly byval([3 x i8]) %data) ; P10LE-NEXT: mflr r0 ; P10LE-NEXT: std r0, 16(r1) ; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: sth r3, 48(r1) +; P10LE-NEXT: rldicl r3, r3, 48, 16 +; P10LE-NEXT: stb r3, 50(r1) +; P10LE-NEXT: lhz r3, 48(r1) ; P10LE-NEXT: lbz r4, 50(r1) -; P10LE-NEXT: std r3, 48(r1) ; P10LE-NEXT: sth r3, 61(r1) ; P10LE-NEXT: addi r3, r1, 61 ; P10LE-NEXT: stb r4, 63(r1) @@ -297,11 +306,14 @@ define signext i8 @caller_3([3 x i8]* nocapture readonly byval([3 x i8]) %data) ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: std r3, 176(r1) -; P8BE-NEXT: stb r3, 127(r1) +; P8BE-NEXT: rldicl r4, r3, 56, 8 +; P8BE-NEXT: stb r3, 183(r1) +; P8BE-NEXT: sth r4, 181(r1) +; P8BE-NEXT: lbz r4, 183(r1) +; P8BE-NEXT: lhz r3, 181(r1) +; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: sth r3, 125(r1) ; P8BE-NEXT: addi r3, r1, 125 -; P8BE-NEXT: lhz r4, 181(r1) -; P8BE-NEXT: sth r4, 125(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -315,11 +327,14 @@ define signext i8 @caller_3([3 x i8]* nocapture readonly byval([3 x i8]) %data) ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: std r3, 176(r1) -; P9BE-NEXT: stb r3, 127(r1) +; P9BE-NEXT: rldicl r4, r3, 56, 8 +; P9BE-NEXT: stb r3, 183(r1) +; P9BE-NEXT: sth r4, 181(r1) +; P9BE-NEXT: lbz r4, 183(r1) +; P9BE-NEXT: lhz r3, 181(r1) +; P9BE-NEXT: stb r4, 127(r1) +; P9BE-NEXT: sth r3, 125(r1) ; P9BE-NEXT: addi r3, r1, 125 -; P9BE-NEXT: lhz r4, 181(r1) -; P9BE-NEXT: sth r4, 125(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop ; P9BE-NEXT: li r3, 0 @@ -333,11 +348,14 @@ define signext i8 @caller_3([3 x i8]* nocapture readonly byval([3 x i8]) %data) ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: std r3, 176(r1) -; P10BE-NEXT: stb r3, 127(r1) +; P10BE-NEXT: rldicl r4, r3, 56, 8 +; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: sth r4, 181(r1) +; P10BE-NEXT: lbz r4, 183(r1) +; P10BE-NEXT: lhz r3, 181(r1) +; P10BE-NEXT: stb r4, 127(r1) +; P10BE-NEXT: sth r3, 125(r1) ; P10BE-NEXT: addi r3, r1, 125 -; P10BE-NEXT: lhz r4, 181(r1) -; P10BE-NEXT: sth r4, 125(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop ; P10BE-NEXT: li r3, 0 @@ -492,10 +510,12 @@ define signext i8 @caller_5([5 x i8]* nocapture readonly byval([5 x i8]) %data) ; P8LE-NEXT: mflr r0 ; P8LE-NEXT: std r0, 16(r1) ; P8LE-NEXT: stdu r1, -64(r1) -; P8LE-NEXT: lbz r4, 52(r1) -; P8LE-NEXT: std r3, 48(r1) +; P8LE-NEXT: rldicl r4, r3, 32, 32 +; P8LE-NEXT: stw r3, 48(r1) ; P8LE-NEXT: stw r3, 59(r1) ; P8LE-NEXT: addi r3, r1, 59 +; P8LE-NEXT: stb r4, 52(r1) +; P8LE-NEXT: lbz r4, 52(r1) ; P8LE-NEXT: stb r4, 63(r1) ; P8LE-NEXT: bl callee ; P8LE-NEXT: nop @@ -510,10 +530,12 @@ define signext i8 @caller_5([5 x i8]* nocapture readonly byval([5 x i8]) %data) ; P9LE-NEXT: mflr r0 ; P9LE-NEXT: std r0, 16(r1) ; P9LE-NEXT: stdu r1, -64(r1) -; P9LE-NEXT: lbz r4, 52(r1) -; P9LE-NEXT: std r3, 48(r1) +; P9LE-NEXT: rldicl r4, r3, 32, 32 +; P9LE-NEXT: stw r3, 48(r1) ; P9LE-NEXT: stw r3, 59(r1) ; P9LE-NEXT: addi r3, r1, 59 +; P9LE-NEXT: stb r4, 52(r1) +; P9LE-NEXT: lbz r4, 52(r1) ; P9LE-NEXT: stb r4, 63(r1) ; P9LE-NEXT: bl callee ; P9LE-NEXT: nop @@ -528,10 +550,12 @@ define signext i8 @caller_5([5 x i8]* nocapture readonly byval([5 x i8]) %data) ; P10LE-NEXT: mflr r0 ; P10LE-NEXT: std r0, 16(r1) ; P10LE-NEXT: stdu r1, -64(r1) -; P10LE-NEXT: lbz r4, 52(r1) -; P10LE-NEXT: std r3, 48(r1) +; P10LE-NEXT: rldicl r4, r3, 32, 32 +; P10LE-NEXT: stw r3, 48(r1) ; P10LE-NEXT: stw r3, 59(r1) ; P10LE-NEXT: addi r3, r1, 59 +; P10LE-NEXT: stb r4, 52(r1) +; P10LE-NEXT: lbz r4, 52(r1) ; P10LE-NEXT: stb r4, 63(r1) ; P10LE-NEXT: bl callee@notoc ; P10LE-NEXT: li r3, 0 @@ -545,11 +569,14 @@ define signext i8 @caller_5([5 x i8]* nocapture readonly byval([5 x i8]) %data) ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: std r3, 176(r1) -; P8BE-NEXT: stb r3, 127(r1) +; P8BE-NEXT: rldicl r4, r3, 56, 8 +; P8BE-NEXT: stb r3, 183(r1) +; P8BE-NEXT: stw r4, 179(r1) +; P8BE-NEXT: lbz r4, 183(r1) +; P8BE-NEXT: lwz r3, 179(r1) +; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: stw r3, 123(r1) ; P8BE-NEXT: addi r3, r1, 123 -; P8BE-NEXT: lwz r4, 179(r1) -; P8BE-NEXT: stw r4, 123(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -563,11 +590,14 @@ define signext i8 @caller_5([5 x i8]* nocapture readonly byval([5 x i8]) %data) ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: std r3, 176(r1) -; P9BE-NEXT: stb r3, 127(r1) +; P9BE-NEXT: rldicl r4, r3, 56, 8 +; P9BE-NEXT: stb r3, 183(r1) +; P9BE-NEXT: stw r4, 179(r1) +; P9BE-NEXT: lbz r4, 183(r1) +; P9BE-NEXT: lwz r3, 179(r1) +; P9BE-NEXT: stb r4, 127(r1) +; P9BE-NEXT: stw r3, 123(r1) ; P9BE-NEXT: addi r3, r1, 123 -; P9BE-NEXT: lwz r4, 179(r1) -; P9BE-NEXT: stw r4, 123(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop ; P9BE-NEXT: li r3, 0 @@ -581,11 +611,14 @@ define signext i8 @caller_5([5 x i8]* nocapture readonly byval([5 x i8]) %data) ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: std r3, 176(r1) -; P10BE-NEXT: stb r3, 127(r1) +; P10BE-NEXT: rldicl r4, r3, 56, 8 +; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: stw r4, 179(r1) +; P10BE-NEXT: lbz r4, 183(r1) +; P10BE-NEXT: lwz r3, 179(r1) +; P10BE-NEXT: stb r4, 127(r1) +; P10BE-NEXT: stw r3, 123(r1) ; P10BE-NEXT: addi r3, r1, 123 -; P10BE-NEXT: lwz r4, 179(r1) -; P10BE-NEXT: stw r4, 123(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop ; P10BE-NEXT: li r3, 0 @@ -625,8 +658,11 @@ define signext i8 @caller_6([6 x i8]* nocapture readonly byval([6 x i8]) %data) ; P8LE-NEXT: mflr r0 ; P8LE-NEXT: std r0, 16(r1) ; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: rldicl r4, r3, 32, 32 +; P8LE-NEXT: stw r3, 48(r1) +; P8LE-NEXT: sth r4, 52(r1) +; P8LE-NEXT: lwz r3, 48(r1) ; P8LE-NEXT: lhz r4, 52(r1) -; P8LE-NEXT: std r3, 48(r1) ; P8LE-NEXT: stw r3, 58(r1) ; P8LE-NEXT: addi r3, r1, 58 ; P8LE-NEXT: sth r4, 62(r1) @@ -643,8 +679,11 @@ define signext i8 @caller_6([6 x i8]* nocapture readonly byval([6 x i8]) %data) ; P9LE-NEXT: mflr r0 ; P9LE-NEXT: std r0, 16(r1) ; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: stw r3, 48(r1) +; P9LE-NEXT: rldicl r3, r3, 32, 32 +; P9LE-NEXT: sth r3, 52(r1) +; P9LE-NEXT: lwz r3, 48(r1) ; P9LE-NEXT: lhz r4, 52(r1) -; P9LE-NEXT: std r3, 48(r1) ; P9LE-NEXT: stw r3, 58(r1) ; P9LE-NEXT: addi r3, r1, 58 ; P9LE-NEXT: sth r4, 62(r1) @@ -661,8 +700,11 @@ define signext i8 @caller_6([6 x i8]* nocapture readonly byval([6 x i8]) %data) ; P10LE-NEXT: mflr r0 ; P10LE-NEXT: std r0, 16(r1) ; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: stw r3, 48(r1) +; P10LE-NEXT: rldicl r3, r3, 32, 32 +; P10LE-NEXT: sth r3, 52(r1) +; P10LE-NEXT: lwz r3, 48(r1) ; P10LE-NEXT: lhz r4, 52(r1) -; P10LE-NEXT: std r3, 48(r1) ; P10LE-NEXT: stw r3, 58(r1) ; P10LE-NEXT: addi r3, r1, 58 ; P10LE-NEXT: sth r4, 62(r1) @@ -678,11 +720,14 @@ define signext i8 @caller_6([6 x i8]* nocapture readonly byval([6 x i8]) %data) ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: std r3, 176(r1) -; P8BE-NEXT: sth r3, 126(r1) +; P8BE-NEXT: rldicl r4, r3, 48, 16 +; P8BE-NEXT: sth r3, 182(r1) +; P8BE-NEXT: stw r4, 178(r1) +; P8BE-NEXT: lhz r4, 182(r1) +; P8BE-NEXT: lwz r3, 178(r1) +; P8BE-NEXT: sth r4, 126(r1) +; P8BE-NEXT: stw r3, 122(r1) ; P8BE-NEXT: addi r3, r1, 122 -; P8BE-NEXT: lwz r4, 178(r1) -; P8BE-NEXT: stw r4, 122(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -696,11 +741,14 @@ define signext i8 @caller_6([6 x i8]* nocapture readonly byval([6 x i8]) %data) ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: std r3, 176(r1) -; P9BE-NEXT: sth r3, 126(r1) +; P9BE-NEXT: rldicl r4, r3, 48, 16 +; P9BE-NEXT: sth r3, 182(r1) +; P9BE-NEXT: stw r4, 178(r1) +; P9BE-NEXT: lhz r4, 182(r1) +; P9BE-NEXT: lwz r3, 178(r1) +; P9BE-NEXT: sth r4, 126(r1) +; P9BE-NEXT: stw r3, 122(r1) ; P9BE-NEXT: addi r3, r1, 122 -; P9BE-NEXT: lwz r4, 178(r1) -; P9BE-NEXT: stw r4, 122(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop ; P9BE-NEXT: li r3, 0 @@ -714,11 +762,14 @@ define signext i8 @caller_6([6 x i8]* nocapture readonly byval([6 x i8]) %data) ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: std r3, 176(r1) -; P10BE-NEXT: sth r3, 126(r1) +; P10BE-NEXT: rldicl r4, r3, 48, 16 +; P10BE-NEXT: sth r3, 182(r1) +; P10BE-NEXT: stw r4, 178(r1) +; P10BE-NEXT: lhz r4, 182(r1) +; P10BE-NEXT: lwz r3, 178(r1) +; P10BE-NEXT: sth r4, 126(r1) +; P10BE-NEXT: stw r3, 122(r1) ; P10BE-NEXT: addi r3, r1, 122 -; P10BE-NEXT: lwz r4, 178(r1) -; P10BE-NEXT: stw r4, 122(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop ; P10BE-NEXT: li r3, 0 @@ -762,11 +813,15 @@ define signext i8 @caller_7([7 x i8]* nocapture readonly byval([7 x i8]) %data) ; P8LE-NEXT: mflr r0 ; P8LE-NEXT: std r0, 16(r1) ; P8LE-NEXT: stdu r1, -64(r1) -; P8LE-NEXT: lhz r4, 52(r1) -; P8LE-NEXT: lbz r5, 54(r1) -; P8LE-NEXT: std r3, 48(r1) +; P8LE-NEXT: rldicl r4, r3, 32, 32 +; P8LE-NEXT: rldicl r5, r3, 16, 48 +; P8LE-NEXT: stw r3, 48(r1) ; P8LE-NEXT: stw r3, 57(r1) ; P8LE-NEXT: addi r3, r1, 57 +; P8LE-NEXT: sth r4, 52(r1) +; P8LE-NEXT: stb r5, 54(r1) +; P8LE-NEXT: lhz r4, 52(r1) +; P8LE-NEXT: lbz r5, 54(r1) ; P8LE-NEXT: sth r4, 61(r1) ; P8LE-NEXT: stb r5, 63(r1) ; P8LE-NEXT: bl callee @@ -782,11 +837,15 @@ define signext i8 @caller_7([7 x i8]* nocapture readonly byval([7 x i8]) %data) ; P9LE-NEXT: mflr r0 ; P9LE-NEXT: std r0, 16(r1) ; P9LE-NEXT: stdu r1, -64(r1) -; P9LE-NEXT: lhz r4, 52(r1) -; P9LE-NEXT: lbz r5, 54(r1) -; P9LE-NEXT: std r3, 48(r1) +; P9LE-NEXT: rldicl r4, r3, 32, 32 +; P9LE-NEXT: stw r3, 48(r1) ; P9LE-NEXT: stw r3, 57(r1) +; P9LE-NEXT: sth r4, 52(r1) +; P9LE-NEXT: rldicl r4, r3, 16, 48 ; P9LE-NEXT: addi r3, r1, 57 +; P9LE-NEXT: stb r4, 54(r1) +; P9LE-NEXT: lhz r4, 52(r1) +; P9LE-NEXT: lbz r5, 54(r1) ; P9LE-NEXT: sth r4, 61(r1) ; P9LE-NEXT: stb r5, 63(r1) ; P9LE-NEXT: bl callee @@ -802,11 +861,15 @@ define signext i8 @caller_7([7 x i8]* nocapture readonly byval([7 x i8]) %data) ; P10LE-NEXT: mflr r0 ; P10LE-NEXT: std r0, 16(r1) ; P10LE-NEXT: stdu r1, -64(r1) -; P10LE-NEXT: lhz r4, 52(r1) -; P10LE-NEXT: lbz r5, 54(r1) -; P10LE-NEXT: std r3, 48(r1) +; P10LE-NEXT: rldicl r4, r3, 32, 32 +; P10LE-NEXT: stw r3, 48(r1) ; P10LE-NEXT: stw r3, 57(r1) +; P10LE-NEXT: sth r4, 52(r1) +; P10LE-NEXT: rldicl r4, r3, 16, 48 ; P10LE-NEXT: addi r3, r1, 57 +; P10LE-NEXT: stb r4, 54(r1) +; P10LE-NEXT: lhz r4, 52(r1) +; P10LE-NEXT: lbz r5, 54(r1) ; P10LE-NEXT: sth r4, 61(r1) ; P10LE-NEXT: stb r5, 63(r1) ; P10LE-NEXT: bl callee@notoc @@ -821,13 +884,18 @@ define signext i8 @caller_7([7 x i8]* nocapture readonly byval([7 x i8]) %data) ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: std r3, 176(r1) -; P8BE-NEXT: lhz r4, 181(r1) -; P8BE-NEXT: lwz r5, 177(r1) -; P8BE-NEXT: stb r3, 127(r1) +; P8BE-NEXT: rldicl r4, r3, 40, 24 +; P8BE-NEXT: rldicl r5, r3, 56, 8 +; P8BE-NEXT: stb r3, 183(r1) +; P8BE-NEXT: stw r4, 177(r1) +; P8BE-NEXT: sth r5, 181(r1) +; P8BE-NEXT: lbz r4, 183(r1) +; P8BE-NEXT: lwz r3, 177(r1) +; P8BE-NEXT: lhz r5, 181(r1) +; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: stw r3, 121(r1) ; P8BE-NEXT: addi r3, r1, 121 -; P8BE-NEXT: stw r5, 121(r1) -; P8BE-NEXT: sth r4, 125(r1) +; P8BE-NEXT: sth r5, 125(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -841,13 +909,18 @@ define signext i8 @caller_7([7 x i8]* nocapture readonly byval([7 x i8]) %data) ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: std r3, 176(r1) -; P9BE-NEXT: lhz r5, 181(r1) -; P9BE-NEXT: stb r3, 127(r1) +; P9BE-NEXT: rldicl r4, r3, 40, 24 +; P9BE-NEXT: stb r3, 183(r1) +; P9BE-NEXT: lbz r5, 183(r1) +; P9BE-NEXT: stw r4, 177(r1) +; P9BE-NEXT: rldicl r4, r3, 56, 8 +; P9BE-NEXT: lwz r3, 177(r1) +; P9BE-NEXT: sth r4, 181(r1) +; P9BE-NEXT: lhz r4, 181(r1) +; P9BE-NEXT: stb r5, 127(r1) +; P9BE-NEXT: stw r3, 121(r1) ; P9BE-NEXT: addi r3, r1, 121 -; P9BE-NEXT: lwz r4, 177(r1) -; P9BE-NEXT: sth r5, 125(r1) -; P9BE-NEXT: stw r4, 121(r1) +; P9BE-NEXT: sth r4, 125(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop ; P9BE-NEXT: li r3, 0 @@ -861,13 +934,18 @@ define signext i8 @caller_7([7 x i8]* nocapture readonly byval([7 x i8]) %data) ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: std r3, 176(r1) -; P10BE-NEXT: lhz r5, 181(r1) -; P10BE-NEXT: stb r3, 127(r1) +; P10BE-NEXT: rldicl r4, r3, 40, 24 +; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: lbz r5, 183(r1) +; P10BE-NEXT: stw r4, 177(r1) +; P10BE-NEXT: rldicl r4, r3, 56, 8 +; P10BE-NEXT: lwz r3, 177(r1) +; P10BE-NEXT: sth r4, 181(r1) +; P10BE-NEXT: lhz r4, 181(r1) +; P10BE-NEXT: stb r5, 127(r1) +; P10BE-NEXT: stw r3, 121(r1) ; P10BE-NEXT: addi r3, r1, 121 -; P10BE-NEXT: lwz r4, 177(r1) -; P10BE-NEXT: sth r5, 125(r1) -; P10BE-NEXT: stw r4, 121(r1) +; P10BE-NEXT: sth r4, 125(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop ; P10BE-NEXT: li r3, 0 diff --git a/llvm/test/CodeGen/PowerPC/structsinregs.ll b/llvm/test/CodeGen/PowerPC/structsinregs.ll index caa7824..9fd84c5 100644 --- a/llvm/test/CodeGen/PowerPC/structsinregs.ll +++ b/llvm/test/CodeGen/PowerPC/structsinregs.ll @@ -187,13 +187,23 @@ entry: ret i32 %add13 ; CHECK-LABEL: callee2 -; CHECK-DAG: std 9, 96(1) -; CHECK-DAG: std 8, 88(1) -; CHECK-DAG: std 7, 80(1) -; CHECK-DAG: stw 6, 76(1) -; CHECK-DAG: std 5, 64(1) -; CHECK-DAG: sth 4, 62(1) -; CHECK-DAG: stb 3, 55(1) +; CHECK: stb 9, 103(1) +; CHECK: rldicl 10, 9, 56, 8 +; CHECK: sth 10, 101(1) +; CHECK: rldicl 9, 9, 40, 24 +; CHECK: stw 9, 97(1) +; CHECK: sth 8, 94(1) +; CHECK: rldicl 8, 8, 48, 16 +; CHECK: stw 8, 90(1) +; CHECK: stb 7, 87(1) +; CHECK: rldicl 7, 7, 56, 8 +; CHECK: stw 7, 83(1) +; CHECK: stb 5, 71(1) +; CHECK: rldicl 5, 5, 56, 8 +; CHECK: sth 5, 69(1) +; CHECK: stw 6, 76(1) +; CHECK: sth 4, 62(1) +; CHECK: stb 3, 55(1) ; CHECK-DAG: lha {{[0-9]+}}, 62(1) ; CHECK-DAG: lha {{[0-9]+}}, 69(1) ; CHECK-DAG: lbz {{[0-9]+}}, 55(1) -- 2.7.4