From 6cab60fa068f9127c02246b740aa9b75523a33d2 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Mon, 3 Sep 2018 09:15:58 +0000 Subject: [PATCH] Extend hasStoreToStackSlot with list of FI accesses. For instructions that spill/fill to and from multiple frame-indices in a single instruction, hasStoreToStackSlot and hasLoadFromStackSlot should return an array of accesses, rather than just the first encounter of such an access. This better describes FI accesses for AArch64 (paired) LDP/STP instructions. Reviewers: t.p.northover, gberry, thegameg, rengolin, javed.absar, MatzeB Reviewed By: MatzeB Differential Revision: https://reviews.llvm.org/D51537 llvm-svn: 341301 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 21 +++++++++------ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 22 ++++++++++----- llvm/lib/CodeGen/LiveDebugValues.cpp | 4 +-- llvm/lib/CodeGen/RegAllocGreedy.cpp | 16 +++++++---- llvm/lib/CodeGen/TargetInstrInfo.cpp | 31 +++++++++------------- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 16 ++++++++--- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 20 +++++++------- llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 12 ++++----- llvm/lib/Target/Lanai/LanaiInstrInfo.cpp | 7 +++-- llvm/lib/Target/X86/X86InstrInfo.cpp | 14 +++++++--- ...old-masked-merge-scalar-constmask-innerouter.ll | 8 +++--- ...asked-merge-scalar-constmask-interleavedbits.ll | 8 +++--- ...merge-scalar-constmask-interleavedbytehalves.ll | 8 +++--- ...unfold-masked-merge-scalar-constmask-lowhigh.ll | 8 +++--- .../unfold-masked-merge-scalar-variablemask.ll | 8 +++--- llvm/test/CodeGen/AArch64/vec-libcalls.ll | 24 ++++++++--------- 16 files changed, 130 insertions(+), 97 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 423f85b..f2faea0 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -79,6 +79,13 @@ public: return Opc <= TargetOpcode::GENERIC_OP_END; } + // Simple struct describing access to a FrameIndex. + struct FrameAccess { + const MachineMemOperand *MMO; + int FI; + FrameAccess(const MachineMemOperand *MMO, int FI) : MMO(MMO), FI(FI) {} + }; + /// Given a machine instruction descriptor, returns the register /// class constraint for OpNum, or NULL. const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum, @@ -246,14 +253,13 @@ public: } /// If the specified machine instruction has a load from a stack slot, - /// return true along with the FrameIndex of the loaded stack slot and the - /// machine mem operand containing the reference. + /// return true along with the FrameIndices of the loaded stack slot and the + /// machine mem operands containing the reference. /// If not, return false. Unlike isLoadFromStackSlot, this returns true for /// any instructions that loads from the stack. This is just a hint, as some /// cases may be missed. virtual bool hasLoadFromStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; + SmallVectorImpl &Accesses) const; /// If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of @@ -284,14 +290,13 @@ public: } /// If the specified machine instruction has a store to a stack slot, - /// return true along with the FrameIndex of the loaded stack slot and the - /// machine mem operand containing the reference. + /// return true along with the FrameIndices of the loaded stack slot and the + /// machine mem operands containing the reference. /// If not, return false. Unlike isStoreToStackSlot, /// this returns true for any instructions that stores to the /// stack. This is just a hint, as some cases may be missed. virtual bool hasStoreToStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; + SmallVectorImpl &Accesses) const; /// Return true if the specified machine instruction /// is a copy of one stack slot to another and has no other effect. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 09a7de6..c8e564e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -750,18 +750,28 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, const MachineFrameInfo &MFI = MF->getFrameInfo(); bool Commented = false; + auto getSize = [&MFI]( + const SmallVectorImpl &Accesses) { + unsigned Size = 0; + for (auto &A : Accesses) + if (MFI.isSpillSlotObjectIndex(A.FI)) + Size += A.MMO->getSize(); + return Size; + }; + // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; + SmallVector Accesses; if (TII->isLoadFromStackSlotPostFE(MI, FI)) { if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload"; Commented = true; } - } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) { - CommentOS << MMO->getSize() << "-byte Folded Reload"; + } else if (TII->hasLoadFromStackSlot(MI, Accesses)) { + if (auto Size = getSize(Accesses)) { + CommentOS << Size << "-byte Folded Reload"; Commented = true; } } else if (TII->isStoreToStackSlotPostFE(MI, FI)) { @@ -770,9 +780,9 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, CommentOS << MMO->getSize() << "-byte Spill"; Commented = true; } - } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) { - CommentOS << MMO->getSize() << "-byte Folded Spill"; + } else if (TII->hasStoreToStackSlot(MI, Accesses)) { + if (auto Size = getSize(Accesses)) { + CommentOS << Size << "-byte Folded Spill"; Commented = true; } } diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp index 417bd9d..dbc19b0 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -470,7 +470,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg) { const MachineFrameInfo &FrameInfo = MF->getFrameInfo(); int FI; - const MachineMemOperand *MMO; + SmallVector Accesses; // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) @@ -478,7 +478,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, // To identify a spill instruction, use the same criteria as in AsmPrinter. if (!((TII->isStoreToStackSlotPostFE(MI, FI) || - TII->hasStoreToStackSlot(MI, MMO, FI)) && + TII->hasStoreToStackSlot(MI, Accesses)) && FrameInfo.isSpillSlotObjectIndex(FI))) return false; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 3333e1f..d48f37f 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -3120,18 +3120,24 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, // Handle blocks that were not included in subloops. if (Loops->getLoopFor(MBB) == L) for (MachineInstr &MI : *MBB) { - const MachineMemOperand *MMO; + SmallVector Accesses; if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) ++Reloads; - else if (TII->hasLoadFromStackSlot(MI, MMO, FI) && - MFI.isSpillSlotObjectIndex(FI)) + else if (TII->hasLoadFromStackSlot(MI, Accesses) && + llvm::any_of(Accesses, + [&MFI](const TargetInstrInfo::FrameAccess &A) { + return MFI.isSpillSlotObjectIndex(A.FI); + })) ++FoldedReloads; else if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) ++Spills; - else if (TII->hasStoreToStackSlot(MI, MMO, FI) && - MFI.isSpillSlotObjectIndex(FI)) + else if (TII->hasStoreToStackSlot(MI, Accesses) && + llvm::any_of(Accesses, + [&MFI](const TargetInstrInfo::FrameAccess &A) { + return MFI.isSpillSlotObjectIndex(A.FI); + })) ++FoldedSpills; } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 19670c2..4d9aa83 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -339,42 +339,37 @@ bool TargetInstrInfo::PredicateInstruction( return MadeChange; } -bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { +bool TargetInstrInfo::hasLoadFromStackSlot( + const MachineInstr &MI, SmallVectorImpl &Accesses) const { + + size_t StartSize = Accesses.size(); for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), oe = MI.memoperands_end(); o != oe; ++o) { if ((*o)->isLoad()) { if (const FixedStackPseudoSourceValue *Value = dyn_cast_or_null( - (*o)->getPseudoValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } + (*o)->getPseudoValue())) + Accesses.emplace_back(*o, Value->getFrameIndex()); } } - return false; + return Accesses.size() != StartSize; } -bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { +bool TargetInstrInfo::hasStoreToStackSlot( + const MachineInstr &MI, SmallVectorImpl &Accesses) const { + size_t StartSize = Accesses.size(); for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), oe = MI.memoperands_end(); o != oe; ++o) { if ((*o)->isStore()) { if (const FixedStackPseudoSourceValue *Value = dyn_cast_or_null( - (*o)->getPseudoValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } + (*o)->getPseudoValue())) + Accesses.emplace_back(*o, Value->getFrameIndex()); } } - return false; + return Accesses.size() != StartSize; } bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9a4614c..db7e751 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1172,8 +1172,12 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { - const MachineMemOperand *Dummy; - return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); + SmallVector Accesses; + if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) { + FrameIndex = Accesses.begin()->FI; + return true; + } + return false; } void ARMBaseInstrInfo:: @@ -1386,8 +1390,12 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { - const MachineMemOperand *Dummy; - return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); + SmallVector Accesses; + if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) { + FrameIndex = Accesses.begin()->FI; + return true; + } + return false; } /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 77eb283..20ed6a9 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -335,37 +335,37 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, /// This function checks if the instruction or bundle of instructions /// has load from stack slot and returns frameindex and machine memory /// operand of that instruction if true. -bool HexagonInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { +bool HexagonInstrInfo::hasLoadFromStackSlot( + const MachineInstr &MI, + SmallVectorImpl &Accesses) const { if (MI.isBundle()) { const MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::const_instr_iterator MII = MI.getIterator(); for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) - if (TargetInstrInfo::hasLoadFromStackSlot(*MII, MMO, FrameIndex)) + if (TargetInstrInfo::hasLoadFromStackSlot(*MII, Accesses)) return true; return false; } - return TargetInstrInfo::hasLoadFromStackSlot(MI, MMO, FrameIndex); + return TargetInstrInfo::hasLoadFromStackSlot(MI, Accesses); } /// This function checks if the instruction or bundle of instructions /// has store to stack slot and returns frameindex and machine memory /// operand of that instruction if true. -bool HexagonInstrInfo::hasStoreToStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { +bool HexagonInstrInfo::hasStoreToStackSlot( + const MachineInstr &MI, + SmallVectorImpl &Accesses) const { if (MI.isBundle()) { const MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::const_instr_iterator MII = MI.getIterator(); for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) - if (TargetInstrInfo::hasStoreToStackSlot(*MII, MMO, FrameIndex)) + if (TargetInstrInfo::hasStoreToStackSlot(*MII, Accesses)) return true; return false; } - return TargetInstrInfo::hasStoreToStackSlot(MI, MMO, FrameIndex); + return TargetInstrInfo::hasStoreToStackSlot(MI, Accesses); } /// This function can analyze one/two way branching only and should (mostly) be diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 817b27e..d2125fc 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -69,16 +69,16 @@ public: /// Check if the instruction or the bundle of instructions has /// load from stack slots. Return the frameindex and machine memory operand /// if true. - bool hasLoadFromStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const override; + bool hasLoadFromStackSlot( + const MachineInstr &MI, + SmallVectorImpl &Accesses) const override; /// Check if the instruction or the bundle of instructions has /// store to stack slots. Return the frameindex and machine memory operand /// if true. - bool hasStoreToStackSlot(const MachineInstr &MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const override; + bool hasStoreToStackSlot( + const MachineInstr &MI, + SmallVectorImpl &Accesses) const override; /// Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index 493d02b..398c84a 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -733,8 +733,11 @@ unsigned LanaiInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - const MachineMemOperand *Dummy; - return hasLoadFromStackSlot(MI, Dummy, FrameIndex); + SmallVector Accesses; + if (hasLoadFromStackSlot(MI, Accesses)){ + FrameIndex = Accesses.begin()->FI; + return 1; + } } return 0; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f6d8e2c..06a4d1f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -411,8 +411,11 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - const MachineMemOperand *Dummy; - return hasLoadFromStackSlot(MI, Dummy, FrameIndex); + SmallVector Accesses; + if (hasLoadFromStackSlot(MI, Accesses)) { + FrameIndex = Accesses.begin()->FI; + return 1; + } } return 0; } @@ -441,8 +444,11 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, if ((Reg = isStoreToStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - const MachineMemOperand *Dummy; - return hasStoreToStackSlot(MI, Dummy, FrameIndex); + SmallVector Accesses; + if (hasStoreToStackSlot(MI, Accesses)) { + FrameIndex = Accesses.begin()->FI; + return 1; + } } return 0; } diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll index 477fc37..188cb5b 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -206,11 +206,11 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0xffff00 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -225,12 +225,12 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xffff00 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll index 1fc6a0a..2eeffe0 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -212,11 +212,11 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0x55555555 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -231,12 +231,12 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0x55555555 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll index 9020151..a6ebeb1 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -208,11 +208,11 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0xf0f0f0f ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -227,12 +227,12 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xf0f0f0f ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll index 8e5ff65..fc4f7eb 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -201,11 +201,11 @@ define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0xffff ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -220,12 +220,12 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xffff ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll index 6cc4bf4..41de7e8 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -558,11 +558,11 @@ define i32 @in_multiuse_A(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, w3 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -576,12 +576,12 @@ define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, w3 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll index 619e104..80ec45c 100644 --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -141,8 +141,8 @@ define <5 x float> @sin_v5f32(<5 x float> %x) nounwind { ; CHECK-LABEL: sin_v5f32: ; CHECK: // %bb.0: ; CHECK-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill ; CHECK-NEXT: mov v8.16b, v4.16b ; CHECK-NEXT: mov v9.16b, v3.16b @@ -165,8 +165,8 @@ define <5 x float> @sin_v5f32(<5 x float> %x) nounwind { ; CHECK-NEXT: mov v2.16b, v10.16b ; CHECK-NEXT: mov v3.16b, v9.16b ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 8-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: mov v4.16b, v0.16b ; CHECK-NEXT: mov v0.16b, v12.16b ; CHECK-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload @@ -178,9 +178,9 @@ define <5 x float> @sin_v5f32(<5 x float> %x) nounwind { define <6 x float> @sin_v6f32(<6 x float> %x) nounwind { ; CHECK-LABEL: sin_v6f32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d13, d12, [sp, #-64]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: mov v8.16b, v5.16b ; CHECK-NEXT: mov v9.16b, v4.16b @@ -207,12 +207,12 @@ define <6 x float> @sin_v6f32(<6 x float> %x) nounwind { ; CHECK-NEXT: mov v3.16b, v10.16b ; CHECK-NEXT: mov v4.16b, v9.16b ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov v5.16b, v0.16b ; CHECK-NEXT: mov v0.16b, v13.16b ; CHECK-NEXT: mov v1.16b, v12.16b -; CHECK-NEXT: ldp d13, d12, [sp], #64 // 8-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x) ret <6 x float> %r @@ -222,7 +222,7 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind { ; CHECK-LABEL: sin_v3f64: ; CHECK: // %bb.0: ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: mov v8.16b, v2.16b ; CHECK-NEXT: mov v9.16b, v1.16b @@ -235,7 +235,7 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind { ; CHECK-NEXT: bl sin ; CHECK-NEXT: mov v1.16b, v9.16b ; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: mov v2.16b, v0.16b ; CHECK-NEXT: mov v0.16b, v10.16b ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload -- 2.7.4