From c42f0a6e6476971974cb3f52c1138dbd8f9cca1f Mon Sep 17 00:00:00 2001 From: Kishan Parmar Date: Wed, 21 Jun 2023 10:16:43 +0000 Subject: [PATCH] PowerPC/SPE: Add phony registers for high halves of SPE SuperRegs The intent of this patch is to make upper halves of SPE SuperRegs(s0,..,s31) as artificial regs, similar to how X86 has done it. And emit store /reload instructions for the required halves. PR : https://github.com/llvm/llvm-project/issues/57307 Reviewed By: jhibbits Differential Revision: https://reviews.llvm.org/D152437 --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 33 +- llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 27 +- llvm/test/CodeGen/PowerPC/fma-assoc.ll | 56 -- llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll | 12 - llvm/test/CodeGen/PowerPC/fp-strict.ll | 567 +++++++++------------ .../CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll | 20 +- llvm/test/CodeGen/PowerPC/pr55463.ll | 20 +- llvm/test/CodeGen/PowerPC/spe.ll | 156 +++--- 8 files changed, 386 insertions(+), 505 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 510da1a3..0feb42b 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2324,6 +2324,35 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots( if (CSI.empty()) return true; // Early exit if no callee saved registers are modified! + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (Subtarget.hasSPE()) { + // In case of SPE we only have SuperRegs and CRs + // in our CalleSaveInfo vector. + + unsigned Idx = 0; + for (auto &CalleeSaveReg : CSI) { + const MCPhysReg &Reg = CalleeSaveReg.getReg(); + const MCPhysReg &Lower = RegInfo->getSubReg(Reg, 1); + const MCPhysReg &Higher = RegInfo->getSubReg(Reg, 2); + + // Check only for SuperRegs. + if (Lower) { + if (MRI.isPhysRegModified(Higher)) { + Idx++; + continue; + } else { + // Replace Reg if only lower-32 bits modified + CSI.erase(CSI.begin() + Idx); + CSI.insert(CSI.begin() + Idx, CalleeSavedInfo(Lower)); + } + } + Idx++; + } + } + // Early exit if cannot spill gprs to volatile vector registers. MachineFrameInfo &MFI = MF.getFrameInfo(); if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) @@ -2332,8 +2361,6 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots( // Build a BitVector of VSRs that can be used for spilling GPRs. BitVector BVAllocatable = TRI->getAllocatableSet(MF); BitVector BVCalleeSaved(TRI->getNumRegs()); - const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) BVCalleeSaved.set(CSRegs[i]); @@ -2341,7 +2368,7 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots( // Set to 0 if the register is not a volatile VSX register, or if it is // used in the function. if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || - MF.getRegInfo().isPhysRegUsed(Reg)) + MRI.isPhysRegUsed(Reg)) BVAllocatable.reset(Reg); } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index a8932a2..6151faf 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -15,6 +15,7 @@ def sub_gt : SubRegIndex<1, 1>; def sub_eq : SubRegIndex<1, 2>; def sub_un : SubRegIndex<1, 3>; def sub_32 : SubRegIndex<32>; +def sub_32_hi_phony : SubRegIndex<32,32>; def sub_64 : SubRegIndex<64>; def sub_vsx0 : SubRegIndex<128>; def sub_vsx1 : SubRegIndex<128, 128>; @@ -43,13 +44,12 @@ class GP8 : PPCReg { let SubRegIndices = [sub_32]; } -// SPE - One of the 32 64-bit general-purpose registers (SPE) -class SPE : PPCReg { - let HWEncoding = SubReg.HWEncoding; - let SubRegs = [SubReg]; - let SubRegIndices = [sub_32]; +class SPE Enc, list subregs = []> : PPCReg { + let HWEncoding{4-0} = Enc; + let SubRegs = subregs; + let SubRegIndices = [sub_32, sub_32_hi_phony]; + let CoveredBySubRegs = 1; } - // SPR - One of the 32-bit special-purpose registers class SPR num, string n> : PPCReg { let HWEncoding{9-0} = num; @@ -129,6 +129,12 @@ foreach Index = 0-31 in { def R#Index : GPR, DwarfRegNum<[-2, Index]>; } +let isArtificial = 1 in { + foreach Index = 0-31 in { + def H#Index : GPR<-1,"">; + } +} + // 64-bit General-purpose registers foreach Index = 0-31 in { def X#Index : GP8("R"#Index), "r"#Index>, @@ -137,10 +143,12 @@ foreach Index = 0-31 in { // SPE registers foreach Index = 0-31 in { - def S#Index : SPE("R"#Index), "r"#Index>, + def S#Index : SPE<"r"#Index, Index, [!cast("R"#Index), !cast("H"#Index)]>, DwarfRegNum<[!add(Index, 1200), !add(Index, 1200)]>; + } + // Floating-point registers foreach Index = 0-31 in { def F#Index : FPR, @@ -293,6 +301,11 @@ def CARRY: SPR<1, "xer">, DwarfRegNum<[76]> { // that do nothing but change RM will not get deleted. def RM: PPCReg<"**ROUNDING MODE**">; +let isAllocatable = 0 in +def GPRC32 : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "H%u", 2, 12), + (sequence "H%u", 30, 13), + H31, H0, H1)>; + /// Register classes // Allocate volatiles first // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll index 94cbb0c..91a89fd 100644 --- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll +++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll @@ -39,8 +39,6 @@ define double @test_FMADD_ASSOC1(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdadd 4, 3, 5 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul double %A, %B ; [#uses=1] @@ -77,8 +75,6 @@ define double @test_FMADD_ASSOC2(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdadd 4, 5, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul double %A, %B ; [#uses=1] @@ -115,8 +111,6 @@ define double @test_FMSUB_ASSOC1(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdsub 4, 3, 5 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul double %A, %B ; [#uses=1] @@ -153,8 +147,6 @@ define double @test_FMSUB_ASSOC2(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdsub 4, 5, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul double %A, %B ; [#uses=1] @@ -188,8 +180,6 @@ define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdadd 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -223,8 +213,6 @@ define double @test_FMADD_ASSOC_EXT2(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdadd 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -259,8 +247,6 @@ define double @test_FMADD_ASSOC_EXT3(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdadd 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -294,8 +280,6 @@ define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdadd 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -332,8 +316,6 @@ define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdsub 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -368,8 +350,6 @@ define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdsub 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -406,8 +386,6 @@ define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdsub 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -442,8 +420,6 @@ define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdsub 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -480,8 +456,6 @@ define double @test_reassoc_FMADD_ASSOC1(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdadd 4, 3, 5 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] @@ -517,8 +491,6 @@ define double @test_reassoc_FMADD_ASSOC2(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdadd 4, 5, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] @@ -556,8 +528,6 @@ define double @test_reassoc_FMSUB_ASSOC1(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdsub 4, 3, 5 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] @@ -593,8 +563,6 @@ define double @test_reassoc_FMSUB_ASSOC11(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdsub 4, 3, 5 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul contract reassoc double %A, %B ; [#uses=1] @@ -632,8 +600,6 @@ define double @test_reassoc_FMSUB_ASSOC2(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdsub 4, 5, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] @@ -670,8 +636,6 @@ define double @test_fast_FMSUB_ASSOC2(double %A, double %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 3, 4 ; CHECK-SPE-NEXT: efdsub 4, 5, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] @@ -705,8 +669,6 @@ define double @test_reassoc_FMADD_ASSOC_EXT1(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdadd 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -740,8 +702,6 @@ define double @test_reassoc_FMADD_ASSOC_EXT2(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdadd 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -776,8 +736,6 @@ define double @test_reassoc_FMADD_ASSOC_EXT3(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdadd 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -811,8 +769,6 @@ define double @test_reassoc_FMADD_ASSOC_EXT4(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdadd 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -847,8 +803,6 @@ define double @test_reassoc_FMSUB_ASSOC_EXT1(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdsub 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -882,8 +836,6 @@ define double @test_reassoc_FMSUB_ASSOC_EXT2(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdsub 4, 3, 4 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -922,8 +874,6 @@ define double @test_reassoc_FMSUB_ASSOC_EXT3(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdsub 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -960,8 +910,6 @@ define double @test_fast_FMSUB_ASSOC_EXT3(float %A, float %B, double %C, ; CHECK-SPE-NEXT: efdadd 3, 5, 3 ; CHECK-SPE-NEXT: efdsub 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr double %D, double %E) { %F = fmul reassoc float %A, %B @@ -999,8 +947,6 @@ define double @test_reassoc_FMSUB_ASSOC_EXT4(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdsub 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul reassoc float %A, %B ; [#uses=1] @@ -1034,8 +980,6 @@ define double @test_fast_FMSUB_ASSOC_EXT4(float %A, float %B, float %C, ; CHECK-SPE-NEXT: efdcfs 3, 3 ; CHECK-SPE-NEXT: efdsub 4, 4, 3 ; CHECK-SPE-NEXT: evmergehi 3, 4, 4 -; CHECK-SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; CHECK-SPE-NEXT: blr float %D, double %E) { %F = fmul reassoc float %A, %B diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll index 2a7d854..fbf5be16 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll @@ -44,8 +44,6 @@ define i64 @d_to_i64(double %m) #0 { ; SPE-NEXT: .cfi_offset lr, 4 ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: bl __fixdfdi ; SPE-NEXT: lwz r0, 20(r1) ; SPE-NEXT: addi r1, r1, 16 @@ -66,8 +64,6 @@ define i64 @d_to_u64(double %m) #0 { ; SPE-NEXT: .cfi_offset lr, 4 ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: bl __fixunsdfdi ; SPE-NEXT: lwz r0, 20(r1) ; SPE-NEXT: addi r1, r1, 16 @@ -150,8 +146,6 @@ define double @i32_to_d(i32 signext %m) #0 { ; SPE: # %bb.0: # %entry ; SPE-NEXT: efdcfsi r4, r3 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr entry: %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -170,8 +164,6 @@ define double @i64_to_d(i64 %m) #0 { ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: lwz r0, 20(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr @@ -185,8 +177,6 @@ define double @u32_to_d(i32 zeroext %m) #0 { ; SPE: # %bb.0: # %entry ; SPE-NEXT: efdcfui r4, r3 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr entry: %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -205,8 +195,6 @@ define double @u64_to_d(i64 %m) #0 { ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: lwz r0, 20(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll index 3865f23..9f852b0 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll @@ -73,8 +73,6 @@ define double @fadd_f64(double %f1, double %f2) #0 { ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efdadd r4, r3, r5 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fadd.f64( double %f1, double %f2, @@ -200,8 +198,6 @@ define double @fsub_f64(double %f1, double %f2) #0 { ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efdsub r4, r3, r5 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fsub.f64( @@ -328,8 +324,6 @@ define double @fmul_f64(double %f1, double %f2) #0 { ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efdmul r4, r3, r5 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fmul.f64( @@ -456,8 +450,6 @@ define double @fdiv_f64(double %f1, double %f2) #0 { ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efddiv r4, r3, r5 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fdiv.f64( @@ -565,8 +557,6 @@ define double @no_fma_fold(double %f1, double %f2, double %f3) #0 { ; SPE-NEXT: efdmul r3, r3, r5 ; SPE-NEXT: efdadd r4, r3, r7 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr %mul = call double @llvm.experimental.constrained.fmul.f64( double %f1, double %f2, @@ -635,18 +625,10 @@ define double @fmadd_f64(double %f0, double %f1, double %f2) #0 { ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r6, r6 ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: lwz r0, 20(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr @@ -699,42 +681,42 @@ define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-LABEL: fmadd_v4f32: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: stw r0, 100(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: stw r0, 68(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -88 -; SPE-NEXT: .cfi_offset r22, -80 -; SPE-NEXT: .cfi_offset r23, -72 -; SPE-NEXT: .cfi_offset r24, -64 -; SPE-NEXT: .cfi_offset r25, -56 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 -; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r27, r5 -; SPE-NEXT: lwz r5, 116(r1) -; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: lwz r5, 84(r1) +; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r25, r3 -; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r26, r4 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r28, r7 -; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r29, r8 -; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r30, r9 -; SPE-NEXT: lwz r24, 104(r1) -; SPE-NEXT: lwz r23, 108(r1) -; SPE-NEXT: lwz r22, 112(r1) +; SPE-NEXT: lwz r24, 72(r1) +; SPE-NEXT: lwz r23, 76(r1) +; SPE-NEXT: lwz r22, 80(r1) ; SPE-NEXT: bl fmaf ; SPE-NEXT: mr r21, r3 ; SPE-NEXT: mr r3, r27 @@ -754,18 +736,18 @@ define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-NEXT: mr r4, r29 ; SPE-NEXT: mr r5, r30 ; SPE-NEXT: mr r6, r21 -; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( @@ -791,36 +773,31 @@ define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x doub ; SPE-LABEL: fmadd_v2f64: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -64(r1) -; SPE-NEXT: stw r0, 68(r1) -; SPE-NEXT: .cfi_def_cfa_offset 64 +; SPE-NEXT: stwu r1, -80(r1) +; SPE-NEXT: stw r0, 84(r1) +; SPE-NEXT: .cfi_def_cfa_offset 80 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 +; SPE-NEXT: .cfi_offset r26, -64 +; SPE-NEXT: .cfi_offset r27, -56 +; SPE-NEXT: .cfi_offset r28, -48 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -8 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill ; SPE-NEXT: evmergelo r27, r7, r8 ; SPE-NEXT: evmergelo r9, r9, r10 ; SPE-NEXT: evmergelo r4, r5, r6 ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r8, 80(r1) +; SPE-NEXT: evldd r8, 96(r1) ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r29, 88(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: evldd r29, 104(r1) ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: evldd r28, 72(r1) -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: evldd r28, 88(r1) ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r26, r3, r4 ; SPE-NEXT: evmergehi r3, r27, r27 @@ -829,21 +806,18 @@ define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x doub ; SPE-NEXT: mr r4, r27 ; SPE-NEXT: mr r6, r28 ; SPE-NEXT: mr r8, r29 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: li r5, 8 ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: evstddx r3, r30, r5 ; SPE-NEXT: evstdd r26, 0(r30) -; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 68(r1) -; SPE-NEXT: addi r1, r1, 64 +; SPE-NEXT: lwz r0, 84(r1) +; SPE-NEXT: addi r1, r1, 80 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( @@ -911,19 +885,11 @@ define double @fmsub_f64(double %f0, double %f1, double %f2) #0 { ; SPE-NEXT: efdneg r8, r3 ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r6, r6 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: lwz r0, 20(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr @@ -980,45 +946,45 @@ define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-LABEL: fmsub_v4f32: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: stw r0, 100(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: stw r0, 68(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -88 -; SPE-NEXT: .cfi_offset r22, -80 -; SPE-NEXT: .cfi_offset r23, -72 -; SPE-NEXT: .cfi_offset r24, -64 -; SPE-NEXT: .cfi_offset r25, -56 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 -; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r25, r3 -; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r26, r4 -; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r27, r5 -; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r28, r7 -; SPE-NEXT: lwz r3, 112(r1) -; SPE-NEXT: lwz r4, 104(r1) -; SPE-NEXT: lwz r5, 108(r1) -; SPE-NEXT: lwz r7, 116(r1) -; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: lwz r3, 80(r1) +; SPE-NEXT: lwz r4, 72(r1) +; SPE-NEXT: lwz r5, 76(r1) +; SPE-NEXT: lwz r7, 84(r1) +; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill ; SPE-NEXT: efsneg r22, r3 -; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill ; SPE-NEXT: efsneg r23, r5 -; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill ; SPE-NEXT: efsneg r24, r4 ; SPE-NEXT: efsneg r5, r7 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r29, r8 -; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r30, r9 ; SPE-NEXT: bl fmaf ; SPE-NEXT: mr r21, r3 @@ -1039,18 +1005,18 @@ define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-NEXT: mr r4, r29 ; SPE-NEXT: mr r5, r30 ; SPE-NEXT: mr r6, r21 -; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <4 x float> %vf2 @@ -1077,19 +1043,19 @@ define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x doub ; SPE-LABEL: fmsub_v2f64: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -64(r1) -; SPE-NEXT: stw r0, 68(r1) -; SPE-NEXT: .cfi_def_cfa_offset 64 +; SPE-NEXT: stwu r1, -80(r1) +; SPE-NEXT: stw r0, 84(r1) +; SPE-NEXT: .cfi_def_cfa_offset 80 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 -; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: .cfi_offset r26, -64 +; SPE-NEXT: .cfi_offset r27, -56 +; SPE-NEXT: .cfi_offset r28, -48 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r3, 80(r1) -; SPE-NEXT: evldd r11, 88(r1) +; SPE-NEXT: evldd r3, 96(r1) +; SPE-NEXT: evldd r11, 104(r1) ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: efdneg r27, r11 @@ -1103,12 +1069,7 @@ define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x doub ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: evmergehi r7, r8, r8 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r28, 72(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: evldd r28, 88(r1) ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r26, r3, r4 ; SPE-NEXT: evmergehi r3, r29, r29 @@ -1117,21 +1078,18 @@ define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x doub ; SPE-NEXT: mr r4, r29 ; SPE-NEXT: mr r6, r28 ; SPE-NEXT: mr r8, r27 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: li r5, 8 ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: evstddx r3, r30, r5 ; SPE-NEXT: evstdd r26, 0(r30) -; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 68(r1) -; SPE-NEXT: addi r1, r1, 64 +; SPE-NEXT: lwz r0, 84(r1) +; SPE-NEXT: addi r1, r1, 80 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <2 x double> %vf2 @@ -1200,19 +1158,11 @@ define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 { ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r6, r6 ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efdneg r4, r3 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: lwz r0, 20(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr @@ -1269,42 +1219,42 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-LABEL: fnmadd_v4f32: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: stw r0, 100(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: stw r0, 68(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -88 -; SPE-NEXT: .cfi_offset r22, -80 -; SPE-NEXT: .cfi_offset r23, -72 -; SPE-NEXT: .cfi_offset r24, -64 -; SPE-NEXT: .cfi_offset r25, -56 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 -; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r27, r5 -; SPE-NEXT: lwz r5, 116(r1) -; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: lwz r5, 84(r1) +; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r25, r3 -; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r26, r4 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r28, r7 -; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r29, r8 -; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r30, r9 -; SPE-NEXT: lwz r24, 104(r1) -; SPE-NEXT: lwz r23, 108(r1) -; SPE-NEXT: lwz r22, 112(r1) +; SPE-NEXT: lwz r24, 72(r1) +; SPE-NEXT: lwz r23, 76(r1) +; SPE-NEXT: lwz r22, 80(r1) ; SPE-NEXT: bl fmaf ; SPE-NEXT: mr r21, r3 ; SPE-NEXT: mr r3, r27 @@ -1325,18 +1275,18 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-NEXT: efsneg r5, r30 ; SPE-NEXT: efsneg r3, r3 ; SPE-NEXT: efsneg r6, r21 -; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( @@ -1363,36 +1313,31 @@ define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou ; SPE-LABEL: fnmadd_v2f64: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -64(r1) -; SPE-NEXT: stw r0, 68(r1) -; SPE-NEXT: .cfi_def_cfa_offset 64 +; SPE-NEXT: stwu r1, -80(r1) +; SPE-NEXT: stw r0, 84(r1) +; SPE-NEXT: .cfi_def_cfa_offset 80 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 +; SPE-NEXT: .cfi_offset r26, -64 +; SPE-NEXT: .cfi_offset r27, -56 +; SPE-NEXT: .cfi_offset r28, -48 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -8 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill ; SPE-NEXT: evmergelo r27, r7, r8 ; SPE-NEXT: evmergelo r9, r9, r10 ; SPE-NEXT: evmergelo r4, r5, r6 ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r8, 80(r1) +; SPE-NEXT: evldd r8, 96(r1) ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r29, 88(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: evldd r29, 104(r1) ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: evldd r28, 72(r1) -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: evldd r28, 88(r1) ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r26, r3, r4 ; SPE-NEXT: evmergehi r3, r27, r27 @@ -1401,9 +1346,6 @@ define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou ; SPE-NEXT: mr r4, r27 ; SPE-NEXT: mr r6, r28 ; SPE-NEXT: mr r8, r29 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: li r5, 8 @@ -1411,13 +1353,13 @@ define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou ; SPE-NEXT: evstddx r3, r30, r5 ; SPE-NEXT: efdneg r3, r26 ; SPE-NEXT: evstdd r3, 0(r30) -; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 68(r1) -; SPE-NEXT: addi r1, r1, 64 +; SPE-NEXT: lwz r0, 84(r1) +; SPE-NEXT: addi r1, r1, 80 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( @@ -1488,20 +1430,12 @@ define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 { ; SPE-NEXT: efdneg r8, r3 ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r6, r6 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efdneg r4, r3 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: lwz r0, 20(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr @@ -1560,45 +1494,45 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-LABEL: fnmsub_v4f32: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: stw r0, 100(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: stw r0, 68(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -88 -; SPE-NEXT: .cfi_offset r22, -80 -; SPE-NEXT: .cfi_offset r23, -72 -; SPE-NEXT: .cfi_offset r24, -64 -; SPE-NEXT: .cfi_offset r25, -56 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 -; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r25, r3 -; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r26, r4 -; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r27, r5 -; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r28, r7 -; SPE-NEXT: lwz r3, 112(r1) -; SPE-NEXT: lwz r4, 104(r1) -; SPE-NEXT: lwz r5, 108(r1) -; SPE-NEXT: lwz r7, 116(r1) -; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: lwz r3, 80(r1) +; SPE-NEXT: lwz r4, 72(r1) +; SPE-NEXT: lwz r5, 76(r1) +; SPE-NEXT: lwz r7, 84(r1) +; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill ; SPE-NEXT: efsneg r22, r3 -; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill ; SPE-NEXT: efsneg r23, r5 -; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill ; SPE-NEXT: efsneg r24, r4 ; SPE-NEXT: efsneg r5, r7 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r29, r8 -; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r30, r9 ; SPE-NEXT: bl fmaf ; SPE-NEXT: mr r21, r3 @@ -1620,18 +1554,18 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> ; SPE-NEXT: efsneg r5, r30 ; SPE-NEXT: efsneg r3, r3 ; SPE-NEXT: efsneg r6, r21 -; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <4 x float> %vf2 @@ -1659,19 +1593,19 @@ define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou ; SPE-LABEL: fnmsub_v2f64: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -64(r1) -; SPE-NEXT: stw r0, 68(r1) -; SPE-NEXT: .cfi_def_cfa_offset 64 +; SPE-NEXT: stwu r1, -80(r1) +; SPE-NEXT: stw r0, 84(r1) +; SPE-NEXT: .cfi_def_cfa_offset 80 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -48 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 -; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: .cfi_offset r26, -64 +; SPE-NEXT: .cfi_offset r27, -56 +; SPE-NEXT: .cfi_offset r28, -48 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r3, 80(r1) -; SPE-NEXT: evldd r11, 88(r1) +; SPE-NEXT: evldd r3, 96(r1) +; SPE-NEXT: evldd r11, 104(r1) ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: efdneg r27, r11 @@ -1685,12 +1619,7 @@ define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: evmergehi r7, r8, r8 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r28, 72(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 -; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: evldd r28, 88(r1) ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r26, r3, r4 ; SPE-NEXT: evmergehi r3, r29, r29 @@ -1699,9 +1628,6 @@ define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou ; SPE-NEXT: mr r4, r29 ; SPE-NEXT: mr r6, r28 ; SPE-NEXT: mr r8, r27 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 -; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 -; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: bl fma ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: li r5, 8 @@ -1709,13 +1635,13 @@ define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou ; SPE-NEXT: evstddx r3, r30, r5 ; SPE-NEXT: efdneg r3, r26 ; SPE-NEXT: evstdd r3, 0(r30) -; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 68(r1) -; SPE-NEXT: addi r1, r1, 64 +; SPE-NEXT: lwz r0, 84(r1) +; SPE-NEXT: addi r1, r1, 80 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <2 x double> %vf2 @@ -1777,14 +1703,10 @@ define double @fsqrt_f64(double %f1) #0 { ; SPE-NEXT: .cfi_offset lr, 4 ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: bl sqrt ; SPE-NEXT: evmergelo r4, r3, r4 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: lwz r0, 20(r1) -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr @@ -1824,21 +1746,21 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 { ; SPE-LABEL: fsqrt_v4f32: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -48(r1) -; SPE-NEXT: stw r0, 52(r1) -; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: stwu r1, -32(r1) +; SPE-NEXT: stw r0, 36(r1) +; SPE-NEXT: .cfi_def_cfa_offset 32 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r27, -40 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 -; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: stw r28, 16(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r28, r3 ; SPE-NEXT: mr r3, r6 -; SPE-NEXT: evstdd r27, 8(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r27, 12(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 20(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r29, r4 -; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 24(r1) # 4-byte Folded Spill ; SPE-NEXT: mr r30, r5 ; SPE-NEXT: bl sqrtf ; SPE-NEXT: mr r27, r3 @@ -1853,12 +1775,12 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 { ; SPE-NEXT: mr r4, r29 ; SPE-NEXT: mr r5, r30 ; SPE-NEXT: mr r6, r27 -; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: evldd r27, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 52(r1) -; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 20(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 16(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 12(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 36(r1) +; SPE-NEXT: addi r1, r1, 32 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( @@ -1883,37 +1805,34 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 { ; SPE-LABEL: fsqrt_v2f64: ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 -; SPE-NEXT: stwu r1, -48(r1) -; SPE-NEXT: stw r0, 52(r1) -; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: stw r0, 68(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r28, -32 -; SPE-NEXT: .cfi_offset r29, -24 -; SPE-NEXT: .cfi_offset r30, -16 +; SPE-NEXT: .cfi_offset r28, -48 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -8 ; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill -; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill ; SPE-NEXT: evmergelo r29, r7, r8 ; SPE-NEXT: evmergelo r4, r5, r6 ; SPE-NEXT: mr r30, r3 ; SPE-NEXT: evmergehi r3, r4, r4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: bl sqrt ; SPE-NEXT: evmergelo r28, r3, r4 ; SPE-NEXT: evmergehi r3, r29, r29 ; SPE-NEXT: mr r4, r29 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: bl sqrt ; SPE-NEXT: li r5, 8 ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: evstddx r3, r30, r5 ; SPE-NEXT: evstdd r28, 0(r30) -; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload ; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r0, 52(r1) -; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll index 9414750..5c3ba3c 100644 --- a/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll +++ b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll @@ -1,11 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ ; RUN: -mattr=+spe | FileCheck %s define i32 @test_f32(float %x) { ; CHECK-LABEL: test_f32: -; CHECK: #APP +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: stw 3, 8(1) +; CHECK-NEXT: stw 31, 12(1) # 4-byte Folded Spill +; CHECK-NEXT: lwz 3, 8(1) +; CHECK-NEXT: #APP ; CHECK-NEXT: efsctsi 31, 3 ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: mr 3, 31 +; CHECK-NEXT: lwz 31, 12(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr entry: %0 = call i32 asm sideeffect "efsctsi $0, $1", "={f31},f"(float %x) ret i32 %0 @@ -13,9 +25,13 @@ entry: define i32 @test_f64(double %x) { ; CHECK-LABEL: test_f64: -; CHECK: #APP +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: evmergelo 3, 3, 4 +; CHECK-NEXT: #APP ; CHECK-NEXT: efdctsi 0, 3 ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: mr 3, 0 +; CHECK-NEXT: blr entry: %0 = call i32 asm sideeffect "efdctsi $0, $1", "={f0},d"(double %x) ret i32 %0 diff --git a/llvm/test/CodeGen/PowerPC/pr55463.ll b/llvm/test/CodeGen/PowerPC/pr55463.ll index 8045bc6..11ee81e 100644 --- a/llvm/test/CodeGen/PowerPC/pr55463.ll +++ b/llvm/test/CodeGen/PowerPC/pr55463.ll @@ -46,26 +46,22 @@ define void @wombat() #0 { ; CHECK-NEXT: lis 4, .LCPI1_0@ha ; CHECK-NEXT: lis 6, .LCPI1_1@ha ; CHECK-NEXT: stw 0, 52(1) -; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill ; CHECK-NEXT: evlddx 30, 4, 3 ; CHECK-NEXT: # implicit-def: $r3 ; CHECK-NEXT: evlddx 29, 6, 5 -; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill +; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill ; CHECK-NEXT: # implicit-def: $r28 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_1: # %bb1 ; CHECK-NEXT: # ; CHECK-NEXT: efdcfsi 8, 3 -; CHECK-NEXT: mr 4, 30 -; CHECK-NEXT: mr 6, 29 ; CHECK-NEXT: evmergehi 3, 30, 30 ; CHECK-NEXT: evmergehi 5, 29, 29 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: # kill: def $r5 killed $r5 killed $s5 +; CHECK-NEXT: mr 4, 30 +; CHECK-NEXT: mr 6, 29 ; CHECK-NEXT: evmergehi 7, 8, 8 -; CHECK-NEXT: # kill: def $r8 killed $r8 killed $s8 -; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7 ; CHECK-NEXT: bl fma ; CHECK-NEXT: evmergelo 3, 3, 4 ; CHECK-NEXT: addi 28, 28, -1 @@ -74,9 +70,9 @@ define void @wombat() #0 { ; CHECK-NEXT: bc 12, 1, .LBB1_1 ; CHECK-NEXT: # %bb.2: # %bb8 ; CHECK-NEXT: bl wibble -; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; CHECK-NEXT: lwz 28, 32(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 52(1) ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index 14ea241..4bfc413 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -612,8 +612,6 @@ define double @test_ddiv(double %a, double %b) #0 { ; SPE-NEXT: evmergelo 3, 3, 4 ; SPE-NEXT: efddiv 4, 3, 5 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_ddiv: @@ -639,8 +637,6 @@ define double @test_dmul(double %a, double %b) #0 { ; SPE-NEXT: evmergelo 3, 3, 4 ; SPE-NEXT: efdmul 4, 3, 5 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_dmul: @@ -665,8 +661,6 @@ define double @test_dadd(double %a, double %b) #0 { ; SPE-NEXT: evmergelo 3, 3, 4 ; SPE-NEXT: efdadd 4, 3, 5 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_dadd: @@ -691,8 +685,6 @@ define double @test_dsub(double %a, double %b) #0 { ; SPE-NEXT: evmergelo 3, 3, 4 ; SPE-NEXT: efdsub 4, 3, 5 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_dsub: @@ -716,8 +708,6 @@ define double @test_dneg(double %a) #0 { ; SPE-NEXT: evmergelo 3, 3, 4 ; SPE-NEXT: efdneg 4, 3 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_dneg: @@ -734,8 +724,6 @@ define double @test_stod(float %a) #0 { ; SPE: # %bb.0: # %entry ; SPE-NEXT: efdcfs 4, 3 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_stod: @@ -1125,17 +1113,17 @@ define i32 @test_dcmpueq(double %a, double %b) #0 { ; EFPU2-LABEL: test_dcmpueq: ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 -; EFPU2-NEXT: stwu 1, -80(1) +; EFPU2-NEXT: stwu 1, -48(1) ; EFPU2-NEXT: mfcr 12 -; EFPU2-NEXT: stw 0, 84(1) -; EFPU2-NEXT: stw 12, 76(1) -; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 0, 52(1) +; EFPU2-NEXT: stw 12, 24(1) +; EFPU2-NEXT: stw 27, 28(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 27, 3 -; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 28, 32(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 28, 4 -; EFPU2-NEXT: evstdd 29, 40(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 29, 36(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 29, 5 -; EFPU2-NEXT: evstdd 30, 48(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 30, 40(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 30, 6 ; EFPU2-NEXT: bl __eqdf2 ; EFPU2-NEXT: cmpwi 2, 3, 0 @@ -1156,14 +1144,14 @@ define i32 @test_dcmpueq(double %a, double %b) #0 { ; EFPU2-NEXT: .LBB42_4: # %ret ; EFPU2-NEXT: stw 3, 20(1) ; EFPU2-NEXT: lwz 3, 20(1) -; EFPU2-NEXT: evldd 30, 48(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 29, 40(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload -; EFPU2-NEXT: lwz 12, 76(1) -; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 32(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 12, 24(1) +; EFPU2-NEXT: lwz 27, 28(1) # 4-byte Folded Reload ; EFPU2-NEXT: mtcrf 32, 12 # cr2 -; EFPU2-NEXT: lwz 0, 84(1) -; EFPU2-NEXT: addi 1, 1, 80 +; EFPU2-NEXT: lwz 0, 52(1) +; EFPU2-NEXT: addi 1, 1, 48 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: @@ -1201,17 +1189,17 @@ define i1 @test_dcmpne(double %a, double %b) #0 { ; EFPU2-LABEL: test_dcmpne: ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 -; EFPU2-NEXT: stwu 1, -80(1) +; EFPU2-NEXT: stwu 1, -48(1) ; EFPU2-NEXT: mfcr 12 -; EFPU2-NEXT: stw 0, 84(1) -; EFPU2-NEXT: stw 12, 76(1) -; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 0, 52(1) +; EFPU2-NEXT: stw 12, 24(1) +; EFPU2-NEXT: stw 27, 28(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 27, 3 -; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 28, 32(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 28, 4 -; EFPU2-NEXT: evstdd 29, 40(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 29, 36(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 29, 5 -; EFPU2-NEXT: evstdd 30, 48(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 30, 40(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 30, 6 ; EFPU2-NEXT: bl __unorddf2 ; EFPU2-NEXT: cmpwi 2, 3, 0 @@ -1220,13 +1208,13 @@ define i1 @test_dcmpne(double %a, double %b) #0 { ; EFPU2-NEXT: mr 5, 29 ; EFPU2-NEXT: mr 6, 30 ; EFPU2-NEXT: bl __eqdf2 -; EFPU2-NEXT: evldd 30, 48(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; EFPU2-NEXT: cmpwi 3, 0 -; EFPU2-NEXT: evldd 29, 40(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; EFPU2-NEXT: li 4, 1 -; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 28, 32(1) # 4-byte Folded Reload ; EFPU2-NEXT: crorc 20, 2, 10 -; EFPU2-NEXT: lwz 12, 76(1) +; EFPU2-NEXT: lwz 12, 24(1) ; EFPU2-NEXT: bc 12, 20, .LBB43_2 ; EFPU2-NEXT: # %bb.1: # %entry ; EFPU2-NEXT: ori 3, 4, 0 @@ -1234,10 +1222,10 @@ define i1 @test_dcmpne(double %a, double %b) #0 { ; EFPU2-NEXT: .LBB43_2: # %entry ; EFPU2-NEXT: li 3, 0 ; EFPU2-NEXT: .LBB43_3: # %entry -; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 27, 28(1) # 4-byte Folded Reload ; EFPU2-NEXT: mtcrf 32, 12 # cr2 -; EFPU2-NEXT: lwz 0, 84(1) -; EFPU2-NEXT: addi 1, 1, 80 +; EFPU2-NEXT: lwz 0, 52(1) +; EFPU2-NEXT: addi 1, 1, 48 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: @@ -1514,8 +1502,6 @@ define double @test_dselect(double %a, double %b, i1 %c) #0 { ; SPE-NEXT: evor 4, 5, 5 ; SPE-NEXT: .LBB49_2: # %entry ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_dselect: @@ -1580,8 +1566,6 @@ define double @test_dfromui(i32 %a) #0 { ; SPE: # %bb.0: # %entry ; SPE-NEXT: efdcfui 4, 3 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_dfromui: @@ -1604,8 +1588,6 @@ define double @test_dfromsi(i32 %a) #0 { ; SPE: # %bb.0: # %entry ; SPE-NEXT: efdcfsi 4, 3 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: blr ; ; EFPU2-LABEL: test_dfromsi: @@ -1687,9 +1669,7 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, ptr %a3, ptr %a4, ptr %a5 ; SPE-NEXT: evldd 4, 24(1) # 8-byte Folded Reload ; SPE-NEXT: li 5, 256 ; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: evldd 29, 248(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 28, 240(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 27, 232(1) # 8-byte Folded Reload @@ -1715,15 +1695,15 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, ptr %a3, ptr %a4, ptr %a5 ; EFPU2-LABEL: test_spill: ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 -; EFPU2-NEXT: stwu 1, -144(1) +; EFPU2-NEXT: stwu 1, -128(1) ; EFPU2-NEXT: mr 5, 3 ; EFPU2-NEXT: mr 6, 4 -; EFPU2-NEXT: stw 0, 148(1) -; EFPU2-NEXT: evstdd 27, 104(1) # 8-byte Folded Spill -; EFPU2-NEXT: evstdd 28, 112(1) # 8-byte Folded Spill -; EFPU2-NEXT: evstdd 29, 120(1) # 8-byte Folded Spill -; EFPU2-NEXT: evstdd 30, 128(1) # 8-byte Folded Spill -; EFPU2-NEXT: lwz 28, 152(1) +; EFPU2-NEXT: stw 0, 132(1) +; EFPU2-NEXT: stw 27, 108(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 112(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 116(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 120(1) # 4-byte Folded Spill +; EFPU2-NEXT: lwz 28, 136(1) ; EFPU2-NEXT: bl __adddf3 ; EFPU2-NEXT: lis 5, 16393 ; EFPU2-NEXT: lis 6, -4069 @@ -1734,7 +1714,7 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, ptr %a3, ptr %a4, ptr %a5 ; EFPU2-NEXT: bl __adddf3 ; EFPU2-NEXT: mr 30, 3 ; EFPU2-NEXT: mr 29, 4 -; EFPU2-NEXT: addi 3, 1, 52 +; EFPU2-NEXT: addi 3, 1, 56 ; EFPU2-NEXT: li 4, 0 ; EFPU2-NEXT: li 5, 24 ; EFPU2-NEXT: li 6, 1 @@ -1742,19 +1722,19 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, ptr %a3, ptr %a4, ptr %a5 ; EFPU2-NEXT: bl test_memset ; EFPU2-NEXT: stw 27, 0(28) ; EFPU2-NEXT: bl test_func2 -; EFPU2-NEXT: addi 3, 1, 8 +; EFPU2-NEXT: addi 3, 1, 12 ; EFPU2-NEXT: li 4, 0 ; EFPU2-NEXT: li 5, 20 ; EFPU2-NEXT: li 6, 1 ; EFPU2-NEXT: bl test_memset ; EFPU2-NEXT: mr 3, 30 ; EFPU2-NEXT: mr 4, 29 -; EFPU2-NEXT: evldd 30, 128(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 29, 120(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 28, 112(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 27, 104(1) # 8-byte Folded Reload -; EFPU2-NEXT: lwz 0, 148(1) -; EFPU2-NEXT: addi 1, 1, 144 +; EFPU2-NEXT: lwz 30, 120(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 116(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 112(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 108(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 132(1) +; EFPU2-NEXT: addi 1, 1, 128 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: @@ -1781,8 +1761,8 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { ; CHECK-NEXT: stwu 1, -32(1) ; CHECK-NEXT: cmpwi 3, 0 ; CHECK-NEXT: stw 0, 36(1) -; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; CHECK-NEXT: stw 29, 20(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 30, 24(1) # 4-byte Folded Spill ; CHECK-NEXT: ble 0, .LBB56_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: mr 30, 3 @@ -1803,8 +1783,8 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { ; CHECK-NEXT: # implicit-def: $r5 ; CHECK-NEXT: .LBB56_4: # %for.cond.cleanup ; CHECK-NEXT: mr 3, 5 -; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; CHECK-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 20(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 36(1) ; CHECK-NEXT: addi 1, 1, 32 ; CHECK-NEXT: mtlr 0 @@ -1844,27 +1824,25 @@ define void @d(ptr %e, ptr %f) #0 { ; SPE-NEXT: stw 0, 52(1) ; SPE-NEXT: lwz 4, 0(4) ; SPE-NEXT: lwz 3, 0(3) -; SPE-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill ; SPE-NEXT: efdcfs 29, 4 -; SPE-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill +; SPE-NEXT: stw 28, 32(1) # 4-byte Folded Spill ; SPE-NEXT: mr 4, 29 -; SPE-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill ; SPE-NEXT: efdcfs 30, 3 ; SPE-NEXT: evmergehi 3, 29, 29 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: bl foo ; SPE-NEXT: mr 28, 3 ; SPE-NEXT: evmergehi 3, 30, 30 ; SPE-NEXT: mr 4, 30 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: bl foo ; SPE-NEXT: efdcfsi 3, 28 -; SPE-NEXT: evldd 30, 32(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 30, 16(1) # 8-byte Folded Reload ; SPE-NEXT: efdmul 3, 29, 3 ; SPE-NEXT: efscfd 3, 3 -; SPE-NEXT: evldd 29, 24(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 29, 8(1) # 8-byte Folded Reload ; SPE-NEXT: stw 3, 0(3) -; SPE-NEXT: evldd 28, 16(1) # 8-byte Folded Reload +; SPE-NEXT: lwz 28, 32(1) # 4-byte Folded Reload ; SPE-NEXT: lwz 0, 52(1) ; SPE-NEXT: addi 1, 1, 48 ; SPE-NEXT: mtlr 0 @@ -1873,14 +1851,14 @@ define void @d(ptr %e, ptr %f) #0 { ; EFPU2-LABEL: d: ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 -; EFPU2-NEXT: stwu 1, -64(1) -; EFPU2-NEXT: stw 0, 68(1) +; EFPU2-NEXT: stwu 1, -32(1) +; EFPU2-NEXT: stw 0, 36(1) ; EFPU2-NEXT: lwz 3, 0(3) -; EFPU2-NEXT: evstdd 26, 16(1) # 8-byte Folded Spill -; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill -; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill -; EFPU2-NEXT: evstdd 29, 40(1) # 8-byte Folded Spill -; EFPU2-NEXT: evstdd 30, 48(1) # 8-byte Folded Spill +; EFPU2-NEXT: stw 26, 8(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 27, 12(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 16(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 20(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 24(1) # 4-byte Folded Spill ; EFPU2-NEXT: mr 30, 4 ; EFPU2-NEXT: bl __extendsfdf2 ; EFPU2-NEXT: mr 28, 3 @@ -1903,13 +1881,13 @@ define void @d(ptr %e, ptr %f) #0 { ; EFPU2-NEXT: bl __muldf3 ; EFPU2-NEXT: bl __truncdfsf2 ; EFPU2-NEXT: stw 3, 0(3) -; EFPU2-NEXT: evldd 30, 48(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 29, 40(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload -; EFPU2-NEXT: evldd 26, 16(1) # 8-byte Folded Reload -; EFPU2-NEXT: lwz 0, 68(1) -; EFPU2-NEXT: addi 1, 1, 64 +; EFPU2-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 20(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 16(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 12(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 26, 8(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 36(1) +; EFPU2-NEXT: addi 1, 1, 32 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: -- 2.7.4