/// that it is alive across blocks.
BitVector MayLiveAcrossBlocks;
- /// State of a register unit.
- enum RegUnitState {
+ /// State of a physical register.
+ enum RegState {
+ /// A disabled register is not available for allocation, but an alias may
+ /// be in use. A register can only be moved out of the disabled state if
+ /// all aliases are disabled.
+ regDisabled,
+
/// A free register is not currently in use and can be allocated
/// immediately without checking aliases.
regFree,
/// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- /// Maps each physical register to a RegUnitState enum or virtual register.
- std::vector<unsigned> RegUnitStates;
+ /// Maps each physical register to a RegState enum or a virtual register.
+ std::vector<unsigned> PhysRegState;
SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
-#ifndef NDEBUG
- bool verifyRegStateMapping(const LiveReg &LR) const;
-#endif
-
void killVirtReg(LiveReg &LR);
void killVirtReg(Register VirtReg);
void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
void usePhysReg(MachineOperand &MO);
void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
- unsigned NewState);
+ RegState NewState);
unsigned calcSpillCost(MCPhysReg PhysReg) const;
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
bool mayLiveOut(Register VirtReg);
bool mayLiveIn(Register VirtReg);
- void dumpState() const;
+ void dumpState();
};
} // end anonymous namespace
false)
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
- RegUnitStates[*UI] = NewState;
+ PhysRegState[PhysReg] = NewState;
}
/// This allocates space for the specified virtual register to be held on the
}
}
-#ifndef NDEBUG
-bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
- for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
- if (RegUnitStates[*UI] != LR.VirtReg)
- return false;
- }
-
- return true;
-}
-#endif
-
/// Mark virtreg as no longer available.
void RegAllocFast::killVirtReg(LiveReg &LR) {
- assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
addKillFlag(LR);
- MCPhysReg PhysReg = LR.PhysReg;
- setPhysRegState(PhysReg, regFree);
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
+ "Broken RegState mapping");
+ setPhysRegState(LR.PhysReg, regFree);
LR.PhysReg = 0;
}
/// Do the actual work of spilling.
void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
- assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
-
- MCPhysReg PhysReg = LR.PhysReg;
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- spill(MI, LR.VirtReg, PhysReg, SpillKill);
+ spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
if (SpillKill)
LR.LastUse = nullptr; // Don't kill register again
assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
markRegUsedInInstr(PhysReg);
+ switch (PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ PhysRegState[PhysReg] = regFree;
+ LLVM_FALLTHROUGH;
+ case regFree:
+ MO.setIsKill();
+ return;
+ default:
+ // The physreg was allocated to a virtual register. That means the value we
+ // wanted has been clobbered.
+ llvm_unreachable("Instruction uses an allocated register");
+ }
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (RegUnitStates[*UI]) {
+ // Maybe a superregister is reserved?
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ MCPhysReg Alias = *AI;
+ switch (PhysRegState[Alias]) {
+ case regDisabled:
+ break;
case regReserved:
- RegUnitStates[*UI] = regFree;
+ // Either PhysReg is a subregister of Alias and we mark the
+ // whole register as free, or PhysReg is the superregister of
+ // Alias and we mark all the aliases as disabled before freeing
+ // PhysReg.
+ // In the latter case, since PhysReg was disabled, this means that
+ // its value is defined only by physical sub-registers. This check
+ // is performed by the assert of the default case in this loop.
+ // Note: The value of the superregister may only be partial
+ // defined, that is why regDisabled is a valid state for aliases.
+ assert((TRI->isSuperRegister(PhysReg, Alias) ||
+ TRI->isSuperRegister(Alias, PhysReg)) &&
+ "Instruction is not using a subregister of a reserved register");
LLVM_FALLTHROUGH;
case regFree:
+ if (TRI->isSuperRegister(PhysReg, Alias)) {
+ // Leave the superregister in the working set.
+ setPhysRegState(Alias, regFree);
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ }
+ // Some other alias was in the working set - clear it.
+ setPhysRegState(Alias, regDisabled);
break;
default:
- llvm_unreachable("Unexpected reg unit state");
+ llvm_unreachable("Instruction uses an alias of an allocated register");
}
}
/// similar to defineVirtReg except the physreg is reserved instead of
/// allocated.
void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
- MCPhysReg PhysReg, unsigned NewState) {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (unsigned VirtReg = RegUnitStates[*UI]) {
+ MCPhysReg PhysReg, RegState NewState) {
+ markRegUsedInInstr(PhysReg);
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ LLVM_FALLTHROUGH;
+ case regFree:
+ case regReserved:
+ setPhysRegState(PhysReg, NewState);
+ return;
+ }
+
+ // This is a disabled register, disable all aliases.
+ setPhysRegState(PhysReg, NewState);
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ MCPhysReg Alias = *AI;
+ switch (Register VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
default:
spillVirtReg(MI, VirtReg);
- break;
+ LLVM_FALLTHROUGH;
case regFree:
case regReserved:
+ setPhysRegState(Alias, regDisabled);
+ if (TRI->isSuperRegister(PhysReg, Alias))
+ return;
break;
}
}
-
- markRegUsedInInstr(PhysReg);
- setPhysRegState(PhysReg, NewState);
}
/// Return the cost of spilling clearing out PhysReg and aliases so it is free
<< " is already used in instr.\n");
return spillImpossible;
}
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regFree:
+ return 0;
+ case regReserved:
+ LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+ << printReg(PhysReg, TRI) << " is reserved already.\n");
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ return LRI->Dirty ? spillDirty : spillClean;
+ }
+ }
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (unsigned VirtReg = RegUnitStates[*UI]) {
+ // This is a disabled register, add up cost of aliases.
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
+ unsigned Cost = 0;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ MCPhysReg Alias = *AI;
+ switch (Register VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
case regFree:
+ ++Cost;
break;
case regReserved:
- LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
- << printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
"Missing VirtReg entry");
- return LRI->Dirty ? spillDirty : spillClean;
+ Cost += LRI->Dirty ? spillDirty : spillClean;
+ break;
}
}
}
- return 0;
+ return Cost;
}
/// This method updates local state so that we know that PhysReg is the
if (!Reg || !Reg.isPhysical())
continue;
markRegUsedInInstr(Reg);
-
- for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
- if (!ThroughRegs.count(RegUnitStates[*UI]))
- continue;
-
- // Need to spill any aliasing registers.
- for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
- for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
- definePhysReg(MI, *SI, regFree);
- }
- }
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (ThroughRegs.count(PhysRegState[*AI]))
+ definePhysReg(MI, *AI, regFree);
}
}
}
#ifndef NDEBUG
-
-void RegAllocFast::dumpState() const {
- for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
- ++Unit) {
- switch (unsigned VirtReg = RegUnitStates[Unit]) {
+void RegAllocFast::dumpState() {
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << printReg(Reg, TRI);
+ switch(PhysRegState[Reg]) {
case regFree:
break;
case regReserved:
- dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
+ dbgs() << "*";
break;
default: {
- dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
- if (I->Dirty)
- dbgs() << "[D]";
- assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
+ dbgs() << '=' << printReg(PhysRegState[Reg]);
+ LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ if (LRI->Dirty)
+ dbgs() << "*";
+ assert(LRI->PhysReg == Reg && "Bad inverse map");
break;
}
}
}
dbgs() << '\n';
// Check that LiveVirtRegs is the inverse.
- for (const LiveReg &LR : LiveVirtRegs) {
- Register VirtReg = LR.VirtReg;
- assert(VirtReg.isVirtual() && "Bad map key");
- MCPhysReg PhysReg = LR.PhysReg;
- if (PhysReg != 0) {
- assert(Register::isPhysicalRegister(PhysReg) &&
- "mapped to physreg");
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
- }
- }
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ if (!i->PhysReg)
+ continue;
+ assert(i->VirtReg.isVirtual() && "Bad map key");
+ assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
}
#endif
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
- RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
+ PhysRegState.assign(TRI->getNumRegs(), regDisabled);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
MachineBasicBlock::iterator MII = MBB.begin();
define i32 @fptosi_wh(half %a) nounwind ssp {
entry:
; CHECK-LABEL: fptosi_wh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzs [[REG:w[0-9]+]], s0
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzs [[REG:w[0-9]+]], s1
; CHECK: mov w0, [[REG]]
%conv = fptosi half %a to i32
ret i32 %conv
define i32 @fptoui_swh(half %a) nounwind ssp {
entry:
; CHECK-LABEL: fptoui_swh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzu [[REG:w[0-9]+]], s0
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzu [[REG:w[0-9]+]], s1
; CHECK: mov w0, [[REG]]
%conv = fptoui half %a to i32
ret i32 %conv
; CHECK: ldrh w8, [sp, #12]
; CHECK: str w8, [sp, #8]
; CHECK: ldr w8, [sp, #8]
-; CHECK: ; kill: def $x8 killed $w8
-; CHECK: str x8, [sp]
+; CHECK: mov x9, x8
+; CHECK: str x9, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
%a.addr = alloca i8, align 1
; CHECK: strh w8, [sp, #12]
; CHECK: ldrsh w8, [sp, #12]
; CHECK: str w8, [sp, #8]
-; CHECK: ldrsw x8, [sp, #8]
-; CHECK: str x8, [sp]
+; CHECK: ldrsw x9, [sp, #8]
+; CHECK: str x9, [sp]
; CHECK: ldr x0, [sp]
; CHECK: ret
%a.addr = alloca i8, align 1
; FAST: // %bb.0:
; FAST-NEXT: sub sp, sp, #16 // =16
; FAST-NEXT: .cfi_def_cfa_offset 16
-; FAST-NEXT: fcvt h0, s0
+; FAST-NEXT: fcvt h1, s0
; FAST-NEXT: // implicit-def: $w0
-; FAST-NEXT: fmov s1, w0
-; FAST-NEXT: mov.16b v1, v0
-; FAST-NEXT: fmov w8, s1
+; FAST-NEXT: fmov s0, w0
+; FAST-NEXT: mov.16b v0, v1
+; FAST-NEXT: fmov w8, s0
; FAST-NEXT: mov w0, w8
; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill
; FAST-NEXT: mov w0, w8
; CHECK-LABEL: foo:
; CHECK: sub
; CHECK-DAG: mov x[[SP:[0-9]+]], sp
-; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104
+; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104
+; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]]
; CHECK: strb w0, [x[[SP]], x[[OFFSET]]]
define void @foo(i8 %in) {
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.d[1], x8
-; CHECK-NEXT: cnt v0.16b, v1.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: cnt v1.16b, v1.16b
+; CHECK-NEXT: uaddlv h2, v1.16b
; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: fmov w1, s1
+; CHECK-NEXT: mov w0, w1
; CHECK-NEXT: ret
Entry:
%1 = load i128, i128* %0, align 16
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.d[1], x9
-; CHECK-NEXT: cnt v0.16b, v1.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: cnt v1.16b, v1.16b
+; CHECK-NEXT: uaddlv h2, v1.16b
; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.d[1], x8
-; CHECK-NEXT: cnt v0.16b, v1.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: cnt v1.16b, v1.16b
+; CHECK-NEXT: uaddlv h2, v1.16b
; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w8, s1
-; CHECK-NEXT: add w0, w8, w9
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: fmov w11, s1
+; CHECK-NEXT: add w0, w11, w10
; CHECK-NEXT: ret
Entry:
%1 = load i256, i256* %0, align 16
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: cnt v0.16b, v0.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
-; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: fmov w0, s1
-; CHECK-NEXT: // kill: def $x0 killed $w0
+; CHECK-NEXT: uaddlv h1, v0.16b
+; CHECK-NEXT: // implicit-def: $q0
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: fmov w2, s0
+; CHECK-NEXT: mov w0, w2
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x1, v0.d[1]
; CHECK-NEXT: ret
; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14
; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15
; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16
- ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN: renamable $sgpr20_sgpr21 = S_MOV_B64 $exec
; GCN: renamable $vgpr1 = IMPLICIT_DEF
- ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+ ; GCN: renamable $sgpr22_sgpr23 = IMPLICIT_DEF
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5)
; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr20_sgpr21, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5)
; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
; GCN: renamable $vgpr19 = COPY renamable $vgpr18
- ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+ ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 {
; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_load_dword s2, s[0:1], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[84:91]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 0
+; GCN-NEXT: v_writelane_b32 v0, s5, 1
+; GCN-NEXT: v_writelane_b32 v0, s6, 2
+; GCN-NEXT: v_writelane_b32 v0, s7, 3
+; GCN-NEXT: v_writelane_b32 v0, s8, 4
+; GCN-NEXT: v_writelane_b32 v0, s9, 5
+; GCN-NEXT: v_writelane_b32 v0, s10, 6
+; GCN-NEXT: v_writelane_b32 v0, s11, 7
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 8
+; GCN-NEXT: v_writelane_b32 v0, s5, 9
+; GCN-NEXT: v_writelane_b32 v0, s6, 10
+; GCN-NEXT: v_writelane_b32 v0, s7, 11
+; GCN-NEXT: v_writelane_b32 v0, s8, 12
+; GCN-NEXT: v_writelane_b32 v0, s9, 13
+; GCN-NEXT: v_writelane_b32 v0, s10, 14
+; GCN-NEXT: v_writelane_b32 v0, s11, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 16
+; GCN-NEXT: v_writelane_b32 v0, s5, 17
+; GCN-NEXT: v_writelane_b32 v0, s6, 18
+; GCN-NEXT: v_writelane_b32 v0, s7, 19
+; GCN-NEXT: v_writelane_b32 v0, s8, 20
+; GCN-NEXT: v_writelane_b32 v0, s9, 21
+; GCN-NEXT: v_writelane_b32 v0, s10, 22
+; GCN-NEXT: v_writelane_b32 v0, s11, 23
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 24
+; GCN-NEXT: v_writelane_b32 v0, s5, 25
+; GCN-NEXT: v_writelane_b32 v0, s6, 26
+; GCN-NEXT: v_writelane_b32 v0, s7, 27
+; GCN-NEXT: v_writelane_b32 v0, s8, 28
+; GCN-NEXT: v_writelane_b32 v0, s9, 29
+; GCN-NEXT: v_writelane_b32 v0, s10, 30
+; GCN-NEXT: v_writelane_b32 v0, s11, 31
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 32
+; GCN-NEXT: v_writelane_b32 v0, s5, 33
+; GCN-NEXT: v_writelane_b32 v0, s6, 34
+; GCN-NEXT: v_writelane_b32 v0, s7, 35
+; GCN-NEXT: v_writelane_b32 v0, s8, 36
+; GCN-NEXT: v_writelane_b32 v0, s9, 37
+; GCN-NEXT: v_writelane_b32 v0, s10, 38
+; GCN-NEXT: v_writelane_b32 v0, s11, 39
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 40
+; GCN-NEXT: v_writelane_b32 v0, s5, 41
+; GCN-NEXT: v_writelane_b32 v0, s6, 42
+; GCN-NEXT: v_writelane_b32 v0, s7, 43
+; GCN-NEXT: v_writelane_b32 v0, s8, 44
+; GCN-NEXT: v_writelane_b32 v0, s9, 45
+; GCN-NEXT: v_writelane_b32 v0, s10, 46
+; GCN-NEXT: v_writelane_b32 v0, s11, 47
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 48
+; GCN-NEXT: v_writelane_b32 v0, s5, 49
+; GCN-NEXT: v_writelane_b32 v0, s6, 50
+; GCN-NEXT: v_writelane_b32 v0, s7, 51
+; GCN-NEXT: v_writelane_b32 v0, s8, 52
+; GCN-NEXT: v_writelane_b32 v0, s9, 53
+; GCN-NEXT: v_writelane_b32 v0, s10, 54
+; GCN-NEXT: v_writelane_b32 v0, s11, 55
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:11]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s3, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-NEXT: v_writelane_b32 v0, s4, 1
-; GCN-NEXT: v_writelane_b32 v0, s5, 2
-; GCN-NEXT: v_writelane_b32 v0, s6, 3
-; GCN-NEXT: v_writelane_b32 v0, s7, 4
-; GCN-NEXT: v_writelane_b32 v0, s8, 5
-; GCN-NEXT: v_writelane_b32 v0, s9, 6
-; GCN-NEXT: v_writelane_b32 v0, s10, 7
-; GCN-NEXT: v_writelane_b32 v0, s11, 8
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 9
-; GCN-NEXT: v_writelane_b32 v0, s1, 10
-; GCN-NEXT: v_writelane_b32 v0, s2, 11
-; GCN-NEXT: v_writelane_b32 v0, s3, 12
-; GCN-NEXT: v_writelane_b32 v0, s4, 13
-; GCN-NEXT: v_writelane_b32 v0, s5, 14
-; GCN-NEXT: v_writelane_b32 v0, s6, 15
-; GCN-NEXT: v_writelane_b32 v0, s7, 16
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 17
-; GCN-NEXT: v_writelane_b32 v0, s1, 18
-; GCN-NEXT: v_writelane_b32 v0, s2, 19
-; GCN-NEXT: v_writelane_b32 v0, s3, 20
-; GCN-NEXT: v_writelane_b32 v0, s4, 21
-; GCN-NEXT: v_writelane_b32 v0, s5, 22
-; GCN-NEXT: v_writelane_b32 v0, s6, 23
-; GCN-NEXT: v_writelane_b32 v0, s7, 24
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 25
-; GCN-NEXT: v_writelane_b32 v0, s1, 26
-; GCN-NEXT: v_writelane_b32 v0, s2, 27
-; GCN-NEXT: v_writelane_b32 v0, s3, 28
-; GCN-NEXT: v_writelane_b32 v0, s4, 29
-; GCN-NEXT: v_writelane_b32 v0, s5, 30
-; GCN-NEXT: v_writelane_b32 v0, s6, 31
-; GCN-NEXT: v_writelane_b32 v0, s7, 32
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 33
-; GCN-NEXT: v_writelane_b32 v0, s1, 34
-; GCN-NEXT: v_writelane_b32 v0, s2, 35
-; GCN-NEXT: v_writelane_b32 v0, s3, 36
-; GCN-NEXT: v_writelane_b32 v0, s4, 37
-; GCN-NEXT: v_writelane_b32 v0, s5, 38
-; GCN-NEXT: v_writelane_b32 v0, s6, 39
-; GCN-NEXT: v_writelane_b32 v0, s7, 40
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 41
-; GCN-NEXT: v_writelane_b32 v0, s1, 42
-; GCN-NEXT: v_writelane_b32 v0, s2, 43
-; GCN-NEXT: v_writelane_b32 v0, s3, 44
-; GCN-NEXT: v_writelane_b32 v0, s4, 45
-; GCN-NEXT: v_writelane_b32 v0, s5, 46
-; GCN-NEXT: v_writelane_b32 v0, s6, 47
-; GCN-NEXT: v_writelane_b32 v0, s7, 48
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 49
-; GCN-NEXT: v_writelane_b32 v0, s1, 50
-; GCN-NEXT: v_writelane_b32 v0, s2, 51
-; GCN-NEXT: v_writelane_b32 v0, s3, 52
-; GCN-NEXT: v_writelane_b32 v0, s4, 53
-; GCN-NEXT: v_writelane_b32 v0, s5, 54
-; GCN-NEXT: v_writelane_b32 v0, s6, 55
-; GCN-NEXT: v_writelane_b32 v0, s7, 56
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b32 s8, 0
-; GCN-NEXT: v_readlane_b32 s9, v0, 0
-; GCN-NEXT: s_cmp_lg_u32 s9, s8
-; GCN-NEXT: v_writelane_b32 v0, s12, 57
-; GCN-NEXT: v_writelane_b32 v0, s13, 58
-; GCN-NEXT: v_writelane_b32 v0, s14, 59
-; GCN-NEXT: v_writelane_b32 v0, s15, 60
-; GCN-NEXT: v_writelane_b32 v0, s16, 61
-; GCN-NEXT: v_writelane_b32 v0, s17, 62
-; GCN-NEXT: v_writelane_b32 v0, s18, 63
-; GCN-NEXT: v_writelane_b32 v1, s19, 0
-; GCN-NEXT: v_writelane_b32 v1, s20, 1
-; GCN-NEXT: v_writelane_b32 v1, s21, 2
-; GCN-NEXT: v_writelane_b32 v1, s22, 3
-; GCN-NEXT: v_writelane_b32 v1, s23, 4
-; GCN-NEXT: v_writelane_b32 v1, s24, 5
-; GCN-NEXT: v_writelane_b32 v1, s25, 6
-; GCN-NEXT: v_writelane_b32 v1, s26, 7
-; GCN-NEXT: v_writelane_b32 v1, s27, 8
-; GCN-NEXT: v_writelane_b32 v1, s36, 9
-; GCN-NEXT: v_writelane_b32 v1, s37, 10
-; GCN-NEXT: v_writelane_b32 v1, s38, 11
-; GCN-NEXT: v_writelane_b32 v1, s39, 12
-; GCN-NEXT: v_writelane_b32 v1, s40, 13
-; GCN-NEXT: v_writelane_b32 v1, s41, 14
-; GCN-NEXT: v_writelane_b32 v1, s42, 15
-; GCN-NEXT: v_writelane_b32 v1, s43, 16
-; GCN-NEXT: v_writelane_b32 v1, s44, 17
-; GCN-NEXT: v_writelane_b32 v1, s45, 18
-; GCN-NEXT: v_writelane_b32 v1, s46, 19
-; GCN-NEXT: v_writelane_b32 v1, s47, 20
-; GCN-NEXT: v_writelane_b32 v1, s48, 21
-; GCN-NEXT: v_writelane_b32 v1, s49, 22
-; GCN-NEXT: v_writelane_b32 v1, s50, 23
-; GCN-NEXT: v_writelane_b32 v1, s51, 24
-; GCN-NEXT: v_writelane_b32 v1, s52, 25
-; GCN-NEXT: v_writelane_b32 v1, s53, 26
-; GCN-NEXT: v_writelane_b32 v1, s54, 27
-; GCN-NEXT: v_writelane_b32 v1, s55, 28
-; GCN-NEXT: v_writelane_b32 v1, s56, 29
-; GCN-NEXT: v_writelane_b32 v1, s57, 30
-; GCN-NEXT: v_writelane_b32 v1, s58, 31
-; GCN-NEXT: v_writelane_b32 v1, s59, 32
-; GCN-NEXT: v_writelane_b32 v1, s60, 33
-; GCN-NEXT: v_writelane_b32 v1, s61, 34
-; GCN-NEXT: v_writelane_b32 v1, s62, 35
-; GCN-NEXT: v_writelane_b32 v1, s63, 36
-; GCN-NEXT: v_writelane_b32 v1, s64, 37
-; GCN-NEXT: v_writelane_b32 v1, s65, 38
-; GCN-NEXT: v_writelane_b32 v1, s66, 39
-; GCN-NEXT: v_writelane_b32 v1, s67, 40
-; GCN-NEXT: v_writelane_b32 v1, s68, 41
-; GCN-NEXT: v_writelane_b32 v1, s69, 42
-; GCN-NEXT: v_writelane_b32 v1, s70, 43
-; GCN-NEXT: v_writelane_b32 v1, s71, 44
-; GCN-NEXT: v_writelane_b32 v1, s72, 45
-; GCN-NEXT: v_writelane_b32 v1, s73, 46
-; GCN-NEXT: v_writelane_b32 v1, s74, 47
-; GCN-NEXT: v_writelane_b32 v1, s75, 48
-; GCN-NEXT: v_writelane_b32 v1, s76, 49
-; GCN-NEXT: v_writelane_b32 v1, s77, 50
-; GCN-NEXT: v_writelane_b32 v1, s78, 51
-; GCN-NEXT: v_writelane_b32 v1, s79, 52
-; GCN-NEXT: v_writelane_b32 v1, s80, 53
-; GCN-NEXT: v_writelane_b32 v1, s81, 54
-; GCN-NEXT: v_writelane_b32 v1, s82, 55
-; GCN-NEXT: v_writelane_b32 v1, s83, 56
-; GCN-NEXT: v_writelane_b32 v1, s84, 57
-; GCN-NEXT: v_writelane_b32 v1, s85, 58
-; GCN-NEXT: v_writelane_b32 v1, s86, 59
-; GCN-NEXT: v_writelane_b32 v1, s87, 60
-; GCN-NEXT: v_writelane_b32 v1, s88, 61
-; GCN-NEXT: v_writelane_b32 v1, s89, 62
-; GCN-NEXT: v_writelane_b32 v1, s90, 63
-; GCN-NEXT: v_writelane_b32 v2, s91, 0
-; GCN-NEXT: v_writelane_b32 v2, s0, 1
-; GCN-NEXT: v_writelane_b32 v2, s1, 2
-; GCN-NEXT: v_writelane_b32 v2, s2, 3
-; GCN-NEXT: v_writelane_b32 v2, s3, 4
-; GCN-NEXT: v_writelane_b32 v2, s4, 5
-; GCN-NEXT: v_writelane_b32 v2, s5, 6
-; GCN-NEXT: v_writelane_b32 v2, s6, 7
-; GCN-NEXT: v_writelane_b32 v2, s7, 8
+; GCN-NEXT: s_cmp_lg_u32 s2, s3
+; GCN-NEXT: v_writelane_b32 v0, s12, 56
+; GCN-NEXT: v_writelane_b32 v0, s13, 57
+; GCN-NEXT: v_writelane_b32 v0, s14, 58
+; GCN-NEXT: v_writelane_b32 v0, s15, 59
+; GCN-NEXT: v_writelane_b32 v0, s16, 60
+; GCN-NEXT: v_writelane_b32 v0, s17, 61
+; GCN-NEXT: v_writelane_b32 v0, s18, 62
+; GCN-NEXT: v_writelane_b32 v0, s19, 63
+; GCN-NEXT: v_writelane_b32 v1, s20, 0
+; GCN-NEXT: v_writelane_b32 v1, s21, 1
+; GCN-NEXT: v_writelane_b32 v1, s22, 2
+; GCN-NEXT: v_writelane_b32 v1, s23, 3
+; GCN-NEXT: v_writelane_b32 v1, s24, 4
+; GCN-NEXT: v_writelane_b32 v1, s25, 5
+; GCN-NEXT: v_writelane_b32 v1, s26, 6
+; GCN-NEXT: v_writelane_b32 v1, s27, 7
+; GCN-NEXT: v_writelane_b32 v1, s36, 8
+; GCN-NEXT: v_writelane_b32 v1, s37, 9
+; GCN-NEXT: v_writelane_b32 v1, s38, 10
+; GCN-NEXT: v_writelane_b32 v1, s39, 11
+; GCN-NEXT: v_writelane_b32 v1, s40, 12
+; GCN-NEXT: v_writelane_b32 v1, s41, 13
+; GCN-NEXT: v_writelane_b32 v1, s42, 14
+; GCN-NEXT: v_writelane_b32 v1, s43, 15
+; GCN-NEXT: v_writelane_b32 v1, s44, 16
+; GCN-NEXT: v_writelane_b32 v1, s45, 17
+; GCN-NEXT: v_writelane_b32 v1, s46, 18
+; GCN-NEXT: v_writelane_b32 v1, s47, 19
+; GCN-NEXT: v_writelane_b32 v1, s48, 20
+; GCN-NEXT: v_writelane_b32 v1, s49, 21
+; GCN-NEXT: v_writelane_b32 v1, s50, 22
+; GCN-NEXT: v_writelane_b32 v1, s51, 23
+; GCN-NEXT: v_writelane_b32 v1, s52, 24
+; GCN-NEXT: v_writelane_b32 v1, s53, 25
+; GCN-NEXT: v_writelane_b32 v1, s54, 26
+; GCN-NEXT: v_writelane_b32 v1, s55, 27
+; GCN-NEXT: v_writelane_b32 v1, s56, 28
+; GCN-NEXT: v_writelane_b32 v1, s57, 29
+; GCN-NEXT: v_writelane_b32 v1, s58, 30
+; GCN-NEXT: v_writelane_b32 v1, s59, 31
+; GCN-NEXT: v_writelane_b32 v1, s60, 32
+; GCN-NEXT: v_writelane_b32 v1, s61, 33
+; GCN-NEXT: v_writelane_b32 v1, s62, 34
+; GCN-NEXT: v_writelane_b32 v1, s63, 35
+; GCN-NEXT: v_writelane_b32 v1, s64, 36
+; GCN-NEXT: v_writelane_b32 v1, s65, 37
+; GCN-NEXT: v_writelane_b32 v1, s66, 38
+; GCN-NEXT: v_writelane_b32 v1, s67, 39
+; GCN-NEXT: v_writelane_b32 v1, s68, 40
+; GCN-NEXT: v_writelane_b32 v1, s69, 41
+; GCN-NEXT: v_writelane_b32 v1, s70, 42
+; GCN-NEXT: v_writelane_b32 v1, s71, 43
+; GCN-NEXT: v_writelane_b32 v1, s72, 44
+; GCN-NEXT: v_writelane_b32 v1, s73, 45
+; GCN-NEXT: v_writelane_b32 v1, s74, 46
+; GCN-NEXT: v_writelane_b32 v1, s75, 47
+; GCN-NEXT: v_writelane_b32 v1, s76, 48
+; GCN-NEXT: v_writelane_b32 v1, s77, 49
+; GCN-NEXT: v_writelane_b32 v1, s78, 50
+; GCN-NEXT: v_writelane_b32 v1, s79, 51
+; GCN-NEXT: v_writelane_b32 v1, s80, 52
+; GCN-NEXT: v_writelane_b32 v1, s81, 53
+; GCN-NEXT: v_writelane_b32 v1, s82, 54
+; GCN-NEXT: v_writelane_b32 v1, s83, 55
+; GCN-NEXT: v_writelane_b32 v1, s84, 56
+; GCN-NEXT: v_writelane_b32 v1, s85, 57
+; GCN-NEXT: v_writelane_b32 v1, s86, 58
+; GCN-NEXT: v_writelane_b32 v1, s87, 59
+; GCN-NEXT: v_writelane_b32 v1, s88, 60
+; GCN-NEXT: v_writelane_b32 v1, s89, 61
+; GCN-NEXT: v_writelane_b32 v1, s90, 62
+; GCN-NEXT: v_writelane_b32 v1, s91, 63
+; GCN-NEXT: v_writelane_b32 v2, s4, 0
+; GCN-NEXT: v_writelane_b32 v2, s5, 1
+; GCN-NEXT: v_writelane_b32 v2, s6, 2
+; GCN-NEXT: v_writelane_b32 v2, s7, 3
+; GCN-NEXT: v_writelane_b32 v2, s8, 4
+; GCN-NEXT: v_writelane_b32 v2, s9, 5
+; GCN-NEXT: v_writelane_b32 v2, s10, 6
+; GCN-NEXT: v_writelane_b32 v2, s11, 7
; GCN-NEXT: s_cbranch_scc1 BB0_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s0, v0, 1
-; GCN-NEXT: v_readlane_b32 s1, v0, 2
-; GCN-NEXT: v_readlane_b32 s2, v0, 3
-; GCN-NEXT: v_readlane_b32 s3, v0, 4
-; GCN-NEXT: v_readlane_b32 s4, v0, 5
-; GCN-NEXT: v_readlane_b32 s5, v0, 6
-; GCN-NEXT: v_readlane_b32 s6, v0, 7
-; GCN-NEXT: v_readlane_b32 s7, v0, 8
+; GCN-NEXT: v_readlane_b32 s0, v0, 0
+; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: v_readlane_b32 s2, v0, 2
+; GCN-NEXT: v_readlane_b32 s3, v0, 3
+; GCN-NEXT: v_readlane_b32 s4, v0, 4
+; GCN-NEXT: v_readlane_b32 s5, v0, 5
+; GCN-NEXT: v_readlane_b32 s6, v0, 6
+; GCN-NEXT: v_readlane_b32 s7, v0, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 57
-; GCN-NEXT: v_readlane_b32 s1, v0, 58
-; GCN-NEXT: v_readlane_b32 s2, v0, 59
-; GCN-NEXT: v_readlane_b32 s3, v0, 60
-; GCN-NEXT: v_readlane_b32 s4, v0, 61
-; GCN-NEXT: v_readlane_b32 s5, v0, 62
-; GCN-NEXT: v_readlane_b32 s6, v0, 63
-; GCN-NEXT: v_readlane_b32 s7, v1, 0
+; GCN-NEXT: v_readlane_b32 s0, v0, 56
+; GCN-NEXT: v_readlane_b32 s1, v0, 57
+; GCN-NEXT: v_readlane_b32 s2, v0, 58
+; GCN-NEXT: v_readlane_b32 s3, v0, 59
+; GCN-NEXT: v_readlane_b32 s4, v0, 60
+; GCN-NEXT: v_readlane_b32 s5, v0, 61
+; GCN-NEXT: v_readlane_b32 s6, v0, 62
+; GCN-NEXT: v_readlane_b32 s7, v0, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 1
-; GCN-NEXT: v_readlane_b32 s1, v1, 2
-; GCN-NEXT: v_readlane_b32 s2, v1, 3
-; GCN-NEXT: v_readlane_b32 s3, v1, 4
-; GCN-NEXT: v_readlane_b32 s4, v1, 5
-; GCN-NEXT: v_readlane_b32 s5, v1, 6
-; GCN-NEXT: v_readlane_b32 s6, v1, 7
-; GCN-NEXT: v_readlane_b32 s7, v1, 8
+; GCN-NEXT: v_readlane_b32 s0, v1, 0
+; GCN-NEXT: v_readlane_b32 s1, v1, 1
+; GCN-NEXT: v_readlane_b32 s2, v1, 2
+; GCN-NEXT: v_readlane_b32 s3, v1, 3
+; GCN-NEXT: v_readlane_b32 s4, v1, 4
+; GCN-NEXT: v_readlane_b32 s5, v1, 5
+; GCN-NEXT: v_readlane_b32 s6, v1, 6
+; GCN-NEXT: v_readlane_b32 s7, v1, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 9
-; GCN-NEXT: v_readlane_b32 s1, v1, 10
-; GCN-NEXT: v_readlane_b32 s2, v1, 11
-; GCN-NEXT: v_readlane_b32 s3, v1, 12
-; GCN-NEXT: v_readlane_b32 s4, v1, 13
-; GCN-NEXT: v_readlane_b32 s5, v1, 14
-; GCN-NEXT: v_readlane_b32 s6, v1, 15
-; GCN-NEXT: v_readlane_b32 s7, v1, 16
+; GCN-NEXT: v_readlane_b32 s0, v1, 8
+; GCN-NEXT: v_readlane_b32 s1, v1, 9
+; GCN-NEXT: v_readlane_b32 s2, v1, 10
+; GCN-NEXT: v_readlane_b32 s3, v1, 11
+; GCN-NEXT: v_readlane_b32 s4, v1, 12
+; GCN-NEXT: v_readlane_b32 s5, v1, 13
+; GCN-NEXT: v_readlane_b32 s6, v1, 14
+; GCN-NEXT: v_readlane_b32 s7, v1, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 17
-; GCN-NEXT: v_readlane_b32 s1, v1, 18
-; GCN-NEXT: v_readlane_b32 s2, v1, 19
-; GCN-NEXT: v_readlane_b32 s3, v1, 20
-; GCN-NEXT: v_readlane_b32 s4, v1, 21
-; GCN-NEXT: v_readlane_b32 s5, v1, 22
-; GCN-NEXT: v_readlane_b32 s6, v1, 23
-; GCN-NEXT: v_readlane_b32 s7, v1, 24
+; GCN-NEXT: v_readlane_b32 s0, v1, 16
+; GCN-NEXT: v_readlane_b32 s1, v1, 17
+; GCN-NEXT: v_readlane_b32 s2, v1, 18
+; GCN-NEXT: v_readlane_b32 s3, v1, 19
+; GCN-NEXT: v_readlane_b32 s4, v1, 20
+; GCN-NEXT: v_readlane_b32 s5, v1, 21
+; GCN-NEXT: v_readlane_b32 s6, v1, 22
+; GCN-NEXT: v_readlane_b32 s7, v1, 23
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 25
-; GCN-NEXT: v_readlane_b32 s1, v1, 26
-; GCN-NEXT: v_readlane_b32 s2, v1, 27
-; GCN-NEXT: v_readlane_b32 s3, v1, 28
-; GCN-NEXT: v_readlane_b32 s4, v1, 29
-; GCN-NEXT: v_readlane_b32 s5, v1, 30
-; GCN-NEXT: v_readlane_b32 s6, v1, 31
-; GCN-NEXT: v_readlane_b32 s7, v1, 32
+; GCN-NEXT: v_readlane_b32 s0, v1, 24
+; GCN-NEXT: v_readlane_b32 s1, v1, 25
+; GCN-NEXT: v_readlane_b32 s2, v1, 26
+; GCN-NEXT: v_readlane_b32 s3, v1, 27
+; GCN-NEXT: v_readlane_b32 s4, v1, 28
+; GCN-NEXT: v_readlane_b32 s5, v1, 29
+; GCN-NEXT: v_readlane_b32 s6, v1, 30
+; GCN-NEXT: v_readlane_b32 s7, v1, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 33
-; GCN-NEXT: v_readlane_b32 s1, v1, 34
-; GCN-NEXT: v_readlane_b32 s2, v1, 35
-; GCN-NEXT: v_readlane_b32 s3, v1, 36
-; GCN-NEXT: v_readlane_b32 s4, v1, 37
-; GCN-NEXT: v_readlane_b32 s5, v1, 38
-; GCN-NEXT: v_readlane_b32 s6, v1, 39
-; GCN-NEXT: v_readlane_b32 s7, v1, 40
+; GCN-NEXT: v_readlane_b32 s0, v1, 32
+; GCN-NEXT: v_readlane_b32 s1, v1, 33
+; GCN-NEXT: v_readlane_b32 s2, v1, 34
+; GCN-NEXT: v_readlane_b32 s3, v1, 35
+; GCN-NEXT: v_readlane_b32 s4, v1, 36
+; GCN-NEXT: v_readlane_b32 s5, v1, 37
+; GCN-NEXT: v_readlane_b32 s6, v1, 38
+; GCN-NEXT: v_readlane_b32 s7, v1, 39
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 41
-; GCN-NEXT: v_readlane_b32 s1, v1, 42
-; GCN-NEXT: v_readlane_b32 s2, v1, 43
-; GCN-NEXT: v_readlane_b32 s3, v1, 44
-; GCN-NEXT: v_readlane_b32 s4, v1, 45
-; GCN-NEXT: v_readlane_b32 s5, v1, 46
-; GCN-NEXT: v_readlane_b32 s6, v1, 47
-; GCN-NEXT: v_readlane_b32 s7, v1, 48
+; GCN-NEXT: v_readlane_b32 s0, v1, 40
+; GCN-NEXT: v_readlane_b32 s1, v1, 41
+; GCN-NEXT: v_readlane_b32 s2, v1, 42
+; GCN-NEXT: v_readlane_b32 s3, v1, 43
+; GCN-NEXT: v_readlane_b32 s4, v1, 44
+; GCN-NEXT: v_readlane_b32 s5, v1, 45
+; GCN-NEXT: v_readlane_b32 s6, v1, 46
+; GCN-NEXT: v_readlane_b32 s7, v1, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 49
-; GCN-NEXT: v_readlane_b32 s1, v1, 50
-; GCN-NEXT: v_readlane_b32 s2, v1, 51
-; GCN-NEXT: v_readlane_b32 s3, v1, 52
-; GCN-NEXT: v_readlane_b32 s4, v1, 53
-; GCN-NEXT: v_readlane_b32 s5, v1, 54
-; GCN-NEXT: v_readlane_b32 s6, v1, 55
-; GCN-NEXT: v_readlane_b32 s7, v1, 56
+; GCN-NEXT: v_readlane_b32 s0, v1, 48
+; GCN-NEXT: v_readlane_b32 s1, v1, 49
+; GCN-NEXT: v_readlane_b32 s2, v1, 50
+; GCN-NEXT: v_readlane_b32 s3, v1, 51
+; GCN-NEXT: v_readlane_b32 s4, v1, 52
+; GCN-NEXT: v_readlane_b32 s5, v1, 53
+; GCN-NEXT: v_readlane_b32 s6, v1, 54
+; GCN-NEXT: v_readlane_b32 s7, v1, 55
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 57
-; GCN-NEXT: v_readlane_b32 s1, v1, 58
-; GCN-NEXT: v_readlane_b32 s2, v1, 59
-; GCN-NEXT: v_readlane_b32 s3, v1, 60
-; GCN-NEXT: v_readlane_b32 s4, v1, 61
-; GCN-NEXT: v_readlane_b32 s5, v1, 62
-; GCN-NEXT: v_readlane_b32 s6, v1, 63
-; GCN-NEXT: v_readlane_b32 s7, v2, 0
+; GCN-NEXT: v_readlane_b32 s0, v1, 56
+; GCN-NEXT: v_readlane_b32 s1, v1, 57
+; GCN-NEXT: v_readlane_b32 s2, v1, 58
+; GCN-NEXT: v_readlane_b32 s3, v1, 59
+; GCN-NEXT: v_readlane_b32 s4, v1, 60
+; GCN-NEXT: v_readlane_b32 s5, v1, 61
+; GCN-NEXT: v_readlane_b32 s6, v1, 62
+; GCN-NEXT: v_readlane_b32 s7, v1, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 9
-; GCN-NEXT: v_readlane_b32 s1, v0, 10
-; GCN-NEXT: v_readlane_b32 s2, v0, 11
-; GCN-NEXT: v_readlane_b32 s3, v0, 12
-; GCN-NEXT: v_readlane_b32 s4, v0, 13
-; GCN-NEXT: v_readlane_b32 s5, v0, 14
-; GCN-NEXT: v_readlane_b32 s6, v0, 15
-; GCN-NEXT: v_readlane_b32 s7, v0, 16
+; GCN-NEXT: v_readlane_b32 s0, v0, 8
+; GCN-NEXT: v_readlane_b32 s1, v0, 9
+; GCN-NEXT: v_readlane_b32 s2, v0, 10
+; GCN-NEXT: v_readlane_b32 s3, v0, 11
+; GCN-NEXT: v_readlane_b32 s4, v0, 12
+; GCN-NEXT: v_readlane_b32 s5, v0, 13
+; GCN-NEXT: v_readlane_b32 s6, v0, 14
+; GCN-NEXT: v_readlane_b32 s7, v0, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 17
-; GCN-NEXT: v_readlane_b32 s1, v0, 18
-; GCN-NEXT: v_readlane_b32 s2, v0, 19
-; GCN-NEXT: v_readlane_b32 s3, v0, 20
-; GCN-NEXT: v_readlane_b32 s4, v0, 21
-; GCN-NEXT: v_readlane_b32 s5, v0, 22
-; GCN-NEXT: v_readlane_b32 s6, v0, 23
-; GCN-NEXT: v_readlane_b32 s7, v0, 24
+; GCN-NEXT: v_readlane_b32 s0, v0, 16
+; GCN-NEXT: v_readlane_b32 s1, v0, 17
+; GCN-NEXT: v_readlane_b32 s2, v0, 18
+; GCN-NEXT: v_readlane_b32 s3, v0, 19
+; GCN-NEXT: v_readlane_b32 s4, v0, 20
+; GCN-NEXT: v_readlane_b32 s5, v0, 21
+; GCN-NEXT: v_readlane_b32 s6, v0, 22
+; GCN-NEXT: v_readlane_b32 s7, v0, 23
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 25
-; GCN-NEXT: v_readlane_b32 s1, v0, 26
-; GCN-NEXT: v_readlane_b32 s2, v0, 27
-; GCN-NEXT: v_readlane_b32 s3, v0, 28
-; GCN-NEXT: v_readlane_b32 s4, v0, 29
-; GCN-NEXT: v_readlane_b32 s5, v0, 30
-; GCN-NEXT: v_readlane_b32 s6, v0, 31
-; GCN-NEXT: v_readlane_b32 s7, v0, 32
+; GCN-NEXT: v_readlane_b32 s0, v0, 24
+; GCN-NEXT: v_readlane_b32 s1, v0, 25
+; GCN-NEXT: v_readlane_b32 s2, v0, 26
+; GCN-NEXT: v_readlane_b32 s3, v0, 27
+; GCN-NEXT: v_readlane_b32 s4, v0, 28
+; GCN-NEXT: v_readlane_b32 s5, v0, 29
+; GCN-NEXT: v_readlane_b32 s6, v0, 30
+; GCN-NEXT: v_readlane_b32 s7, v0, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 33
-; GCN-NEXT: v_readlane_b32 s1, v0, 34
-; GCN-NEXT: v_readlane_b32 s2, v0, 35
-; GCN-NEXT: v_readlane_b32 s3, v0, 36
-; GCN-NEXT: v_readlane_b32 s4, v0, 37
-; GCN-NEXT: v_readlane_b32 s5, v0, 38
-; GCN-NEXT: v_readlane_b32 s6, v0, 39
-; GCN-NEXT: v_readlane_b32 s7, v0, 40
+; GCN-NEXT: v_readlane_b32 s0, v0, 32
+; GCN-NEXT: v_readlane_b32 s1, v0, 33
+; GCN-NEXT: v_readlane_b32 s2, v0, 34
+; GCN-NEXT: v_readlane_b32 s3, v0, 35
+; GCN-NEXT: v_readlane_b32 s4, v0, 36
+; GCN-NEXT: v_readlane_b32 s5, v0, 37
+; GCN-NEXT: v_readlane_b32 s6, v0, 38
+; GCN-NEXT: v_readlane_b32 s7, v0, 39
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 41
-; GCN-NEXT: v_readlane_b32 s1, v0, 42
-; GCN-NEXT: v_readlane_b32 s2, v0, 43
-; GCN-NEXT: v_readlane_b32 s3, v0, 44
-; GCN-NEXT: v_readlane_b32 s4, v0, 45
-; GCN-NEXT: v_readlane_b32 s5, v0, 46
-; GCN-NEXT: v_readlane_b32 s6, v0, 47
-; GCN-NEXT: v_readlane_b32 s7, v0, 48
+; GCN-NEXT: v_readlane_b32 s0, v0, 40
+; GCN-NEXT: v_readlane_b32 s1, v0, 41
+; GCN-NEXT: v_readlane_b32 s2, v0, 42
+; GCN-NEXT: v_readlane_b32 s3, v0, 43
+; GCN-NEXT: v_readlane_b32 s4, v0, 44
+; GCN-NEXT: v_readlane_b32 s5, v0, 45
+; GCN-NEXT: v_readlane_b32 s6, v0, 46
+; GCN-NEXT: v_readlane_b32 s7, v0, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 49
-; GCN-NEXT: v_readlane_b32 s1, v0, 50
-; GCN-NEXT: v_readlane_b32 s2, v0, 51
-; GCN-NEXT: v_readlane_b32 s3, v0, 52
-; GCN-NEXT: v_readlane_b32 s4, v0, 53
-; GCN-NEXT: v_readlane_b32 s5, v0, 54
-; GCN-NEXT: v_readlane_b32 s6, v0, 55
-; GCN-NEXT: v_readlane_b32 s7, v0, 56
+; GCN-NEXT: v_readlane_b32 s0, v0, 48
+; GCN-NEXT: v_readlane_b32 s1, v0, 49
+; GCN-NEXT: v_readlane_b32 s2, v0, 50
+; GCN-NEXT: v_readlane_b32 s3, v0, 51
+; GCN-NEXT: v_readlane_b32 s4, v0, 52
+; GCN-NEXT: v_readlane_b32 s5, v0, 53
+; GCN-NEXT: v_readlane_b32 s6, v0, 54
+; GCN-NEXT: v_readlane_b32 s7, v0, 55
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v2, 1
-; GCN-NEXT: v_readlane_b32 s1, v2, 2
-; GCN-NEXT: v_readlane_b32 s2, v2, 3
-; GCN-NEXT: v_readlane_b32 s3, v2, 4
-; GCN-NEXT: v_readlane_b32 s4, v2, 5
-; GCN-NEXT: v_readlane_b32 s5, v2, 6
-; GCN-NEXT: v_readlane_b32 s6, v2, 7
-; GCN-NEXT: v_readlane_b32 s7, v2, 8
+; GCN-NEXT: v_readlane_b32 s0, v2, 0
+; GCN-NEXT: v_readlane_b32 s1, v2, 1
+; GCN-NEXT: v_readlane_b32 s2, v2, 2
+; GCN-NEXT: v_readlane_b32 s3, v2, 3
+; GCN-NEXT: v_readlane_b32 s4, v2, 4
+; GCN-NEXT: v_readlane_b32 s5, v2, 5
+; GCN-NEXT: v_readlane_b32 s6, v2, 6
+; GCN-NEXT: v_readlane_b32 s7, v2, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 {
; GCN-LABEL: split_sgpr_spill_2_vgprs:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_load_dword s2, s[0:1], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[36:51]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 0
+; GCN-NEXT: v_writelane_b32 v0, s5, 1
+; GCN-NEXT: v_writelane_b32 v0, s6, 2
+; GCN-NEXT: v_writelane_b32 v0, s7, 3
+; GCN-NEXT: v_writelane_b32 v0, s8, 4
+; GCN-NEXT: v_writelane_b32 v0, s9, 5
+; GCN-NEXT: v_writelane_b32 v0, s10, 6
+; GCN-NEXT: v_writelane_b32 v0, s11, 7
+; GCN-NEXT: v_writelane_b32 v0, s12, 8
+; GCN-NEXT: v_writelane_b32 v0, s13, 9
+; GCN-NEXT: v_writelane_b32 v0, s14, 10
+; GCN-NEXT: v_writelane_b32 v0, s15, 11
+; GCN-NEXT: v_writelane_b32 v0, s16, 12
+; GCN-NEXT: v_writelane_b32 v0, s17, 13
+; GCN-NEXT: v_writelane_b32 v0, s18, 14
+; GCN-NEXT: v_writelane_b32 v0, s19, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v0, s4, 16
+; GCN-NEXT: v_writelane_b32 v0, s5, 17
+; GCN-NEXT: v_writelane_b32 v0, s6, 18
+; GCN-NEXT: v_writelane_b32 v0, s7, 19
+; GCN-NEXT: v_writelane_b32 v0, s8, 20
+; GCN-NEXT: v_writelane_b32 v0, s9, 21
+; GCN-NEXT: v_writelane_b32 v0, s10, 22
+; GCN-NEXT: v_writelane_b32 v0, s11, 23
+; GCN-NEXT: v_writelane_b32 v0, s12, 24
+; GCN-NEXT: v_writelane_b32 v0, s13, 25
+; GCN-NEXT: v_writelane_b32 v0, s14, 26
+; GCN-NEXT: v_writelane_b32 v0, s15, 27
+; GCN-NEXT: v_writelane_b32 v0, s16, 28
+; GCN-NEXT: v_writelane_b32 v0, s17, 29
+; GCN-NEXT: v_writelane_b32 v0, s18, 30
+; GCN-NEXT: v_writelane_b32 v0, s19, 31
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[20:27]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:1]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s3, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-NEXT: v_writelane_b32 v0, s4, 1
-; GCN-NEXT: v_writelane_b32 v0, s5, 2
-; GCN-NEXT: v_writelane_b32 v0, s6, 3
-; GCN-NEXT: v_writelane_b32 v0, s7, 4
-; GCN-NEXT: v_writelane_b32 v0, s8, 5
-; GCN-NEXT: v_writelane_b32 v0, s9, 6
-; GCN-NEXT: v_writelane_b32 v0, s10, 7
-; GCN-NEXT: v_writelane_b32 v0, s11, 8
-; GCN-NEXT: v_writelane_b32 v0, s12, 9
-; GCN-NEXT: v_writelane_b32 v0, s13, 10
-; GCN-NEXT: v_writelane_b32 v0, s14, 11
-; GCN-NEXT: v_writelane_b32 v0, s15, 12
-; GCN-NEXT: v_writelane_b32 v0, s16, 13
-; GCN-NEXT: v_writelane_b32 v0, s17, 14
-; GCN-NEXT: v_writelane_b32 v0, s18, 15
-; GCN-NEXT: v_writelane_b32 v0, s19, 16
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:15]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[16:31]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s0, 17
-; GCN-NEXT: v_writelane_b32 v0, s1, 18
-; GCN-NEXT: v_writelane_b32 v0, s2, 19
-; GCN-NEXT: v_writelane_b32 v0, s3, 20
-; GCN-NEXT: v_writelane_b32 v0, s4, 21
-; GCN-NEXT: v_writelane_b32 v0, s5, 22
-; GCN-NEXT: v_writelane_b32 v0, s6, 23
-; GCN-NEXT: v_writelane_b32 v0, s7, 24
-; GCN-NEXT: v_writelane_b32 v0, s8, 25
-; GCN-NEXT: v_writelane_b32 v0, s9, 26
-; GCN-NEXT: v_writelane_b32 v0, s10, 27
-; GCN-NEXT: v_writelane_b32 v0, s11, 28
-; GCN-NEXT: v_writelane_b32 v0, s12, 29
-; GCN-NEXT: v_writelane_b32 v0, s13, 30
-; GCN-NEXT: v_writelane_b32 v0, s14, 31
-; GCN-NEXT: v_writelane_b32 v0, s15, 32
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[8:9]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: v_readlane_b32 s11, v0, 0
-; GCN-NEXT: s_cmp_lg_u32 s11, s10
-; GCN-NEXT: v_writelane_b32 v0, s36, 33
-; GCN-NEXT: v_writelane_b32 v0, s37, 34
-; GCN-NEXT: v_writelane_b32 v0, s38, 35
-; GCN-NEXT: v_writelane_b32 v0, s39, 36
-; GCN-NEXT: v_writelane_b32 v0, s40, 37
-; GCN-NEXT: v_writelane_b32 v0, s41, 38
-; GCN-NEXT: v_writelane_b32 v0, s42, 39
-; GCN-NEXT: v_writelane_b32 v0, s43, 40
-; GCN-NEXT: v_writelane_b32 v0, s44, 41
-; GCN-NEXT: v_writelane_b32 v0, s45, 42
-; GCN-NEXT: v_writelane_b32 v0, s46, 43
-; GCN-NEXT: v_writelane_b32 v0, s47, 44
-; GCN-NEXT: v_writelane_b32 v0, s48, 45
-; GCN-NEXT: v_writelane_b32 v0, s49, 46
-; GCN-NEXT: v_writelane_b32 v0, s50, 47
-; GCN-NEXT: v_writelane_b32 v0, s51, 48
-; GCN-NEXT: v_writelane_b32 v0, s16, 49
-; GCN-NEXT: v_writelane_b32 v0, s17, 50
-; GCN-NEXT: v_writelane_b32 v0, s18, 51
-; GCN-NEXT: v_writelane_b32 v0, s19, 52
-; GCN-NEXT: v_writelane_b32 v0, s20, 53
-; GCN-NEXT: v_writelane_b32 v0, s21, 54
-; GCN-NEXT: v_writelane_b32 v0, s22, 55
-; GCN-NEXT: v_writelane_b32 v0, s23, 56
-; GCN-NEXT: v_writelane_b32 v0, s24, 57
-; GCN-NEXT: v_writelane_b32 v0, s25, 58
-; GCN-NEXT: v_writelane_b32 v0, s26, 59
-; GCN-NEXT: v_writelane_b32 v0, s27, 60
-; GCN-NEXT: v_writelane_b32 v0, s28, 61
-; GCN-NEXT: v_writelane_b32 v0, s29, 62
-; GCN-NEXT: v_writelane_b32 v0, s30, 63
-; GCN-NEXT: v_writelane_b32 v1, s31, 0
-; GCN-NEXT: v_writelane_b32 v1, s0, 1
-; GCN-NEXT: v_writelane_b32 v1, s1, 2
-; GCN-NEXT: v_writelane_b32 v1, s2, 3
-; GCN-NEXT: v_writelane_b32 v1, s3, 4
-; GCN-NEXT: v_writelane_b32 v1, s4, 5
-; GCN-NEXT: v_writelane_b32 v1, s5, 6
-; GCN-NEXT: v_writelane_b32 v1, s6, 7
-; GCN-NEXT: v_writelane_b32 v1, s7, 8
-; GCN-NEXT: v_writelane_b32 v1, s8, 9
-; GCN-NEXT: v_writelane_b32 v1, s9, 10
+; GCN-NEXT: s_cmp_lg_u32 s2, s3
+; GCN-NEXT: v_writelane_b32 v0, s36, 32
+; GCN-NEXT: v_writelane_b32 v0, s37, 33
+; GCN-NEXT: v_writelane_b32 v0, s38, 34
+; GCN-NEXT: v_writelane_b32 v0, s39, 35
+; GCN-NEXT: v_writelane_b32 v0, s40, 36
+; GCN-NEXT: v_writelane_b32 v0, s41, 37
+; GCN-NEXT: v_writelane_b32 v0, s42, 38
+; GCN-NEXT: v_writelane_b32 v0, s43, 39
+; GCN-NEXT: v_writelane_b32 v0, s44, 40
+; GCN-NEXT: v_writelane_b32 v0, s45, 41
+; GCN-NEXT: v_writelane_b32 v0, s46, 42
+; GCN-NEXT: v_writelane_b32 v0, s47, 43
+; GCN-NEXT: v_writelane_b32 v0, s48, 44
+; GCN-NEXT: v_writelane_b32 v0, s49, 45
+; GCN-NEXT: v_writelane_b32 v0, s50, 46
+; GCN-NEXT: v_writelane_b32 v0, s51, 47
+; GCN-NEXT: v_writelane_b32 v0, s4, 48
+; GCN-NEXT: v_writelane_b32 v0, s5, 49
+; GCN-NEXT: v_writelane_b32 v0, s6, 50
+; GCN-NEXT: v_writelane_b32 v0, s7, 51
+; GCN-NEXT: v_writelane_b32 v0, s8, 52
+; GCN-NEXT: v_writelane_b32 v0, s9, 53
+; GCN-NEXT: v_writelane_b32 v0, s10, 54
+; GCN-NEXT: v_writelane_b32 v0, s11, 55
+; GCN-NEXT: v_writelane_b32 v0, s12, 56
+; GCN-NEXT: v_writelane_b32 v0, s13, 57
+; GCN-NEXT: v_writelane_b32 v0, s14, 58
+; GCN-NEXT: v_writelane_b32 v0, s15, 59
+; GCN-NEXT: v_writelane_b32 v0, s16, 60
+; GCN-NEXT: v_writelane_b32 v0, s17, 61
+; GCN-NEXT: v_writelane_b32 v0, s18, 62
+; GCN-NEXT: v_writelane_b32 v0, s19, 63
+; GCN-NEXT: v_writelane_b32 v1, s20, 0
+; GCN-NEXT: v_writelane_b32 v1, s21, 1
+; GCN-NEXT: v_writelane_b32 v1, s22, 2
+; GCN-NEXT: v_writelane_b32 v1, s23, 3
+; GCN-NEXT: v_writelane_b32 v1, s24, 4
+; GCN-NEXT: v_writelane_b32 v1, s25, 5
+; GCN-NEXT: v_writelane_b32 v1, s26, 6
+; GCN-NEXT: v_writelane_b32 v1, s27, 7
+; GCN-NEXT: v_writelane_b32 v1, s0, 8
+; GCN-NEXT: v_writelane_b32 v1, s1, 9
; GCN-NEXT: s_cbranch_scc1 BB1_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s0, v0, 1
-; GCN-NEXT: v_readlane_b32 s1, v0, 2
-; GCN-NEXT: v_readlane_b32 s2, v0, 3
-; GCN-NEXT: v_readlane_b32 s3, v0, 4
-; GCN-NEXT: v_readlane_b32 s4, v0, 5
-; GCN-NEXT: v_readlane_b32 s5, v0, 6
-; GCN-NEXT: v_readlane_b32 s6, v0, 7
-; GCN-NEXT: v_readlane_b32 s7, v0, 8
-; GCN-NEXT: v_readlane_b32 s8, v0, 9
-; GCN-NEXT: v_readlane_b32 s9, v0, 10
-; GCN-NEXT: v_readlane_b32 s10, v0, 11
-; GCN-NEXT: v_readlane_b32 s11, v0, 12
-; GCN-NEXT: v_readlane_b32 s12, v0, 13
-; GCN-NEXT: v_readlane_b32 s13, v0, 14
-; GCN-NEXT: v_readlane_b32 s14, v0, 15
-; GCN-NEXT: v_readlane_b32 s15, v0, 16
+; GCN-NEXT: v_readlane_b32 s0, v0, 0
+; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: v_readlane_b32 s2, v0, 2
+; GCN-NEXT: v_readlane_b32 s3, v0, 3
+; GCN-NEXT: v_readlane_b32 s4, v0, 4
+; GCN-NEXT: v_readlane_b32 s5, v0, 5
+; GCN-NEXT: v_readlane_b32 s6, v0, 6
+; GCN-NEXT: v_readlane_b32 s7, v0, 7
+; GCN-NEXT: v_readlane_b32 s8, v0, 8
+; GCN-NEXT: v_readlane_b32 s9, v0, 9
+; GCN-NEXT: v_readlane_b32 s10, v0, 10
+; GCN-NEXT: v_readlane_b32 s11, v0, 11
+; GCN-NEXT: v_readlane_b32 s12, v0, 12
+; GCN-NEXT: v_readlane_b32 s13, v0, 13
+; GCN-NEXT: v_readlane_b32 s14, v0, 14
+; GCN-NEXT: v_readlane_b32 s15, v0, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 33
-; GCN-NEXT: v_readlane_b32 s1, v0, 34
-; GCN-NEXT: v_readlane_b32 s2, v0, 35
-; GCN-NEXT: v_readlane_b32 s3, v0, 36
-; GCN-NEXT: v_readlane_b32 s4, v0, 37
-; GCN-NEXT: v_readlane_b32 s5, v0, 38
-; GCN-NEXT: v_readlane_b32 s6, v0, 39
-; GCN-NEXT: v_readlane_b32 s7, v0, 40
-; GCN-NEXT: v_readlane_b32 s8, v0, 41
-; GCN-NEXT: v_readlane_b32 s9, v0, 42
-; GCN-NEXT: v_readlane_b32 s10, v0, 43
-; GCN-NEXT: v_readlane_b32 s11, v0, 44
-; GCN-NEXT: v_readlane_b32 s12, v0, 45
-; GCN-NEXT: v_readlane_b32 s13, v0, 46
-; GCN-NEXT: v_readlane_b32 s14, v0, 47
-; GCN-NEXT: v_readlane_b32 s15, v0, 48
+; GCN-NEXT: v_readlane_b32 s0, v0, 32
+; GCN-NEXT: v_readlane_b32 s1, v0, 33
+; GCN-NEXT: v_readlane_b32 s2, v0, 34
+; GCN-NEXT: v_readlane_b32 s3, v0, 35
+; GCN-NEXT: v_readlane_b32 s4, v0, 36
+; GCN-NEXT: v_readlane_b32 s5, v0, 37
+; GCN-NEXT: v_readlane_b32 s6, v0, 38
+; GCN-NEXT: v_readlane_b32 s7, v0, 39
+; GCN-NEXT: v_readlane_b32 s8, v0, 40
+; GCN-NEXT: v_readlane_b32 s9, v0, 41
+; GCN-NEXT: v_readlane_b32 s10, v0, 42
+; GCN-NEXT: v_readlane_b32 s11, v0, 43
+; GCN-NEXT: v_readlane_b32 s12, v0, 44
+; GCN-NEXT: v_readlane_b32 s13, v0, 45
+; GCN-NEXT: v_readlane_b32 s14, v0, 46
+; GCN-NEXT: v_readlane_b32 s15, v0, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 17
-; GCN-NEXT: v_readlane_b32 s1, v0, 18
-; GCN-NEXT: v_readlane_b32 s2, v0, 19
-; GCN-NEXT: v_readlane_b32 s3, v0, 20
-; GCN-NEXT: v_readlane_b32 s4, v0, 21
-; GCN-NEXT: v_readlane_b32 s5, v0, 22
-; GCN-NEXT: v_readlane_b32 s6, v0, 23
-; GCN-NEXT: v_readlane_b32 s7, v0, 24
-; GCN-NEXT: v_readlane_b32 s8, v0, 25
-; GCN-NEXT: v_readlane_b32 s9, v0, 26
-; GCN-NEXT: v_readlane_b32 s10, v0, 27
-; GCN-NEXT: v_readlane_b32 s11, v0, 28
-; GCN-NEXT: v_readlane_b32 s12, v0, 29
-; GCN-NEXT: v_readlane_b32 s13, v0, 30
-; GCN-NEXT: v_readlane_b32 s14, v0, 31
-; GCN-NEXT: v_readlane_b32 s15, v0, 32
+; GCN-NEXT: v_readlane_b32 s0, v0, 16
+; GCN-NEXT: v_readlane_b32 s1, v0, 17
+; GCN-NEXT: v_readlane_b32 s2, v0, 18
+; GCN-NEXT: v_readlane_b32 s3, v0, 19
+; GCN-NEXT: v_readlane_b32 s4, v0, 20
+; GCN-NEXT: v_readlane_b32 s5, v0, 21
+; GCN-NEXT: v_readlane_b32 s6, v0, 22
+; GCN-NEXT: v_readlane_b32 s7, v0, 23
+; GCN-NEXT: v_readlane_b32 s8, v0, 24
+; GCN-NEXT: v_readlane_b32 s9, v0, 25
+; GCN-NEXT: v_readlane_b32 s10, v0, 26
+; GCN-NEXT: v_readlane_b32 s11, v0, 27
+; GCN-NEXT: v_readlane_b32 s12, v0, 28
+; GCN-NEXT: v_readlane_b32 s13, v0, 29
+; GCN-NEXT: v_readlane_b32 s14, v0, 30
+; GCN-NEXT: v_readlane_b32 s15, v0, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 1
-; GCN-NEXT: v_readlane_b32 s1, v1, 2
-; GCN-NEXT: v_readlane_b32 s2, v1, 3
-; GCN-NEXT: v_readlane_b32 s3, v1, 4
-; GCN-NEXT: v_readlane_b32 s4, v1, 5
-; GCN-NEXT: v_readlane_b32 s5, v1, 6
-; GCN-NEXT: v_readlane_b32 s6, v1, 7
-; GCN-NEXT: v_readlane_b32 s7, v1, 8
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[0:7]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v1, 9
-; GCN-NEXT: v_readlane_b32 s1, v1, 10
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[0:1]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 49
-; GCN-NEXT: v_readlane_b32 s1, v0, 50
-; GCN-NEXT: v_readlane_b32 s2, v0, 51
-; GCN-NEXT: v_readlane_b32 s3, v0, 52
-; GCN-NEXT: v_readlane_b32 s4, v0, 53
-; GCN-NEXT: v_readlane_b32 s5, v0, 54
-; GCN-NEXT: v_readlane_b32 s6, v0, 55
-; GCN-NEXT: v_readlane_b32 s7, v0, 56
-; GCN-NEXT: v_readlane_b32 s8, v0, 57
-; GCN-NEXT: v_readlane_b32 s9, v0, 58
-; GCN-NEXT: v_readlane_b32 s10, v0, 59
-; GCN-NEXT: v_readlane_b32 s11, v0, 60
-; GCN-NEXT: v_readlane_b32 s12, v0, 61
-; GCN-NEXT: v_readlane_b32 s13, v0, 62
-; GCN-NEXT: v_readlane_b32 s14, v0, 63
-; GCN-NEXT: v_readlane_b32 s15, v1, 0
+; GCN-NEXT: v_readlane_b32 s16, v1, 0
+; GCN-NEXT: v_readlane_b32 s17, v1, 1
+; GCN-NEXT: v_readlane_b32 s18, v1, 2
+; GCN-NEXT: v_readlane_b32 s19, v1, 3
+; GCN-NEXT: v_readlane_b32 s20, v1, 4
+; GCN-NEXT: v_readlane_b32 s21, v1, 5
+; GCN-NEXT: v_readlane_b32 s22, v1, 6
+; GCN-NEXT: v_readlane_b32 s23, v1, 7
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[16:23]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s24, v1, 8
+; GCN-NEXT: v_readlane_b32 s25, v1, 9
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[24:25]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 48
+; GCN-NEXT: v_readlane_b32 s1, v0, 49
+; GCN-NEXT: v_readlane_b32 s2, v0, 50
+; GCN-NEXT: v_readlane_b32 s3, v0, 51
+; GCN-NEXT: v_readlane_b32 s4, v0, 52
+; GCN-NEXT: v_readlane_b32 s5, v0, 53
+; GCN-NEXT: v_readlane_b32 s6, v0, 54
+; GCN-NEXT: v_readlane_b32 s7, v0, 55
+; GCN-NEXT: v_readlane_b32 s8, v0, 56
+; GCN-NEXT: v_readlane_b32 s9, v0, 57
+; GCN-NEXT: v_readlane_b32 s10, v0, 58
+; GCN-NEXT: v_readlane_b32 s11, v0, 59
+; GCN-NEXT: v_readlane_b32 s12, v0, 60
+; GCN-NEXT: v_readlane_b32 s13, v0, 61
+; GCN-NEXT: v_readlane_b32 s14, v0, 62
+; GCN-NEXT: v_readlane_b32 s15, v0, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
; GCN-LABEL: no_vgprs_last_sgpr_spill:
; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0
-; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1
-; GCN-NEXT: s_mov_b32 s58, -1
-; GCN-NEXT: s_mov_b32 s59, 0xe8f000
-; GCN-NEXT: s_add_u32 s56, s56, s3
-; GCN-NEXT: s_addc_u32 s57, s57, 0
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s22, -1
+; GCN-NEXT: s_mov_b32 s23, 0xe8f000
+; GCN-NEXT: s_add_u32 s20, s20, s3
+; GCN-NEXT: s_addc_u32 s21, s21, 0
+; GCN-NEXT: s_load_dword s2, s[0:1], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[36:51]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 0
+; GCN-NEXT: v_writelane_b32 v31, s5, 1
+; GCN-NEXT: v_writelane_b32 v31, s6, 2
+; GCN-NEXT: v_writelane_b32 v31, s7, 3
+; GCN-NEXT: v_writelane_b32 v31, s8, 4
+; GCN-NEXT: v_writelane_b32 v31, s9, 5
+; GCN-NEXT: v_writelane_b32 v31, s10, 6
+; GCN-NEXT: v_writelane_b32 v31, s11, 7
+; GCN-NEXT: v_writelane_b32 v31, s12, 8
+; GCN-NEXT: v_writelane_b32 v31, s13, 9
+; GCN-NEXT: v_writelane_b32 v31, s14, 10
+; GCN-NEXT: v_writelane_b32 v31, s15, 11
+; GCN-NEXT: v_writelane_b32 v31, s16, 12
+; GCN-NEXT: v_writelane_b32 v31, s17, 13
+; GCN-NEXT: v_writelane_b32 v31, s18, 14
+; GCN-NEXT: v_writelane_b32 v31, s19, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 16
+; GCN-NEXT: v_writelane_b32 v31, s5, 17
+; GCN-NEXT: v_writelane_b32 v31, s6, 18
+; GCN-NEXT: v_writelane_b32 v31, s7, 19
+; GCN-NEXT: v_writelane_b32 v31, s8, 20
+; GCN-NEXT: v_writelane_b32 v31, s9, 21
+; GCN-NEXT: v_writelane_b32 v31, s10, 22
+; GCN-NEXT: v_writelane_b32 v31, s11, 23
+; GCN-NEXT: v_writelane_b32 v31, s12, 24
+; GCN-NEXT: v_writelane_b32 v31, s13, 25
+; GCN-NEXT: v_writelane_b32 v31, s14, 26
+; GCN-NEXT: v_writelane_b32 v31, s15, 27
+; GCN-NEXT: v_writelane_b32 v31, s16, 28
+; GCN-NEXT: v_writelane_b32 v31, s17, 29
+; GCN-NEXT: v_writelane_b32 v31, s18, 30
+; GCN-NEXT: v_writelane_b32 v31, s19, 31
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[0:1]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s3, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_writelane_b32 v31, s0, 0
-; GCN-NEXT: v_writelane_b32 v31, s4, 1
-; GCN-NEXT: v_writelane_b32 v31, s5, 2
-; GCN-NEXT: v_writelane_b32 v31, s6, 3
-; GCN-NEXT: v_writelane_b32 v31, s7, 4
-; GCN-NEXT: v_writelane_b32 v31, s8, 5
-; GCN-NEXT: v_writelane_b32 v31, s9, 6
-; GCN-NEXT: v_writelane_b32 v31, s10, 7
-; GCN-NEXT: v_writelane_b32 v31, s11, 8
-; GCN-NEXT: v_writelane_b32 v31, s12, 9
-; GCN-NEXT: v_writelane_b32 v31, s13, 10
-; GCN-NEXT: v_writelane_b32 v31, s14, 11
-; GCN-NEXT: v_writelane_b32 v31, s15, 12
-; GCN-NEXT: v_writelane_b32 v31, s16, 13
-; GCN-NEXT: v_writelane_b32 v31, s17, 14
-; GCN-NEXT: v_writelane_b32 v31, s18, 15
-; GCN-NEXT: v_writelane_b32 v31, s19, 16
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[0:15]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[16:31]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; def s[34:35]
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b32 s33, 0
-; GCN-NEXT: v_readlane_b32 s52, v31, 0
-; GCN-NEXT: s_cmp_lg_u32 s52, s33
-; GCN-NEXT: v_writelane_b32 v31, s36, 17
-; GCN-NEXT: v_writelane_b32 v31, s37, 18
-; GCN-NEXT: v_writelane_b32 v31, s38, 19
-; GCN-NEXT: v_writelane_b32 v31, s39, 20
-; GCN-NEXT: v_writelane_b32 v31, s40, 21
-; GCN-NEXT: v_writelane_b32 v31, s41, 22
-; GCN-NEXT: v_writelane_b32 v31, s42, 23
-; GCN-NEXT: v_writelane_b32 v31, s43, 24
-; GCN-NEXT: v_writelane_b32 v31, s44, 25
-; GCN-NEXT: v_writelane_b32 v31, s45, 26
-; GCN-NEXT: v_writelane_b32 v31, s46, 27
-; GCN-NEXT: v_writelane_b32 v31, s47, 28
-; GCN-NEXT: v_writelane_b32 v31, s48, 29
-; GCN-NEXT: v_writelane_b32 v31, s49, 30
-; GCN-NEXT: v_writelane_b32 v31, s50, 31
-; GCN-NEXT: v_writelane_b32 v31, s51, 32
-; GCN-NEXT: v_writelane_b32 v31, s0, 33
-; GCN-NEXT: v_writelane_b32 v31, s1, 34
-; GCN-NEXT: v_writelane_b32 v31, s2, 35
-; GCN-NEXT: v_writelane_b32 v31, s3, 36
-; GCN-NEXT: v_writelane_b32 v31, s4, 37
-; GCN-NEXT: v_writelane_b32 v31, s5, 38
-; GCN-NEXT: v_writelane_b32 v31, s6, 39
-; GCN-NEXT: v_writelane_b32 v31, s7, 40
-; GCN-NEXT: v_writelane_b32 v31, s8, 41
-; GCN-NEXT: v_writelane_b32 v31, s9, 42
-; GCN-NEXT: v_writelane_b32 v31, s10, 43
-; GCN-NEXT: v_writelane_b32 v31, s11, 44
-; GCN-NEXT: v_writelane_b32 v31, s12, 45
-; GCN-NEXT: v_writelane_b32 v31, s13, 46
-; GCN-NEXT: v_writelane_b32 v31, s14, 47
-; GCN-NEXT: v_writelane_b32 v31, s15, 48
-; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0
-; GCN-NEXT: v_writelane_b32 v0, s16, 0
-; GCN-NEXT: v_writelane_b32 v0, s17, 1
-; GCN-NEXT: v_writelane_b32 v0, s18, 2
-; GCN-NEXT: v_writelane_b32 v0, s19, 3
-; GCN-NEXT: v_writelane_b32 v0, s20, 4
-; GCN-NEXT: v_writelane_b32 v0, s21, 5
-; GCN-NEXT: v_writelane_b32 v0, s22, 6
-; GCN-NEXT: v_writelane_b32 v0, s23, 7
-; GCN-NEXT: v_writelane_b32 v0, s24, 8
-; GCN-NEXT: v_writelane_b32 v0, s25, 9
-; GCN-NEXT: v_writelane_b32 v0, s26, 10
-; GCN-NEXT: v_writelane_b32 v0, s27, 11
-; GCN-NEXT: v_writelane_b32 v0, s28, 12
-; GCN-NEXT: v_writelane_b32 v0, s29, 13
-; GCN-NEXT: v_writelane_b32 v0, s30, 14
-; GCN-NEXT: v_writelane_b32 v0, s31, 15
-; GCN-NEXT: s_mov_b64 s[16:17], exec
-; GCN-NEXT: s_mov_b64 exec, 0xffff
-; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[16:17]
-; GCN-NEXT: v_writelane_b32 v31, s34, 49
-; GCN-NEXT: v_writelane_b32 v31, s35, 50
-; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0
+; GCN-NEXT: s_cmp_lg_u32 s2, s3
+; GCN-NEXT: v_writelane_b32 v31, s36, 32
+; GCN-NEXT: v_writelane_b32 v31, s37, 33
+; GCN-NEXT: v_writelane_b32 v31, s38, 34
+; GCN-NEXT: v_writelane_b32 v31, s39, 35
+; GCN-NEXT: v_writelane_b32 v31, s40, 36
+; GCN-NEXT: v_writelane_b32 v31, s41, 37
+; GCN-NEXT: v_writelane_b32 v31, s42, 38
+; GCN-NEXT: v_writelane_b32 v31, s43, 39
+; GCN-NEXT: v_writelane_b32 v31, s44, 40
+; GCN-NEXT: v_writelane_b32 v31, s45, 41
+; GCN-NEXT: v_writelane_b32 v31, s46, 42
+; GCN-NEXT: v_writelane_b32 v31, s47, 43
+; GCN-NEXT: v_writelane_b32 v31, s48, 44
+; GCN-NEXT: v_writelane_b32 v31, s49, 45
+; GCN-NEXT: v_writelane_b32 v31, s50, 46
+; GCN-NEXT: v_writelane_b32 v31, s51, 47
+; GCN-NEXT: v_writelane_b32 v31, s4, 48
+; GCN-NEXT: v_writelane_b32 v31, s5, 49
+; GCN-NEXT: v_writelane_b32 v31, s6, 50
+; GCN-NEXT: v_writelane_b32 v31, s7, 51
+; GCN-NEXT: v_writelane_b32 v31, s8, 52
+; GCN-NEXT: v_writelane_b32 v31, s9, 53
+; GCN-NEXT: v_writelane_b32 v31, s10, 54
+; GCN-NEXT: v_writelane_b32 v31, s11, 55
+; GCN-NEXT: v_writelane_b32 v31, s12, 56
+; GCN-NEXT: v_writelane_b32 v31, s13, 57
+; GCN-NEXT: v_writelane_b32 v31, s14, 58
+; GCN-NEXT: v_writelane_b32 v31, s15, 59
+; GCN-NEXT: v_writelane_b32 v31, s16, 60
+; GCN-NEXT: v_writelane_b32 v31, s17, 61
+; GCN-NEXT: v_writelane_b32 v31, s18, 62
+; GCN-NEXT: v_writelane_b32 v31, s19, 63
+; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0
+; GCN-NEXT: v_writelane_b32 v0, s0, 0
+; GCN-NEXT: v_writelane_b32 v0, s1, 1
+; GCN-NEXT: s_mov_b64 s[0:1], exec
+; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[0:1]
+; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0
; GCN-NEXT: s_cbranch_scc1 BB2_2
; GCN-NEXT: ; %bb.1: ; %bb0
-; GCN-NEXT: v_readlane_b32 s0, v31, 1
-; GCN-NEXT: v_readlane_b32 s1, v31, 2
-; GCN-NEXT: v_readlane_b32 s2, v31, 3
-; GCN-NEXT: v_readlane_b32 s3, v31, 4
-; GCN-NEXT: v_readlane_b32 s4, v31, 5
-; GCN-NEXT: v_readlane_b32 s5, v31, 6
-; GCN-NEXT: v_readlane_b32 s6, v31, 7
-; GCN-NEXT: v_readlane_b32 s7, v31, 8
-; GCN-NEXT: v_readlane_b32 s8, v31, 9
-; GCN-NEXT: v_readlane_b32 s9, v31, 10
-; GCN-NEXT: v_readlane_b32 s10, v31, 11
-; GCN-NEXT: v_readlane_b32 s11, v31, 12
-; GCN-NEXT: v_readlane_b32 s12, v31, 13
-; GCN-NEXT: v_readlane_b32 s13, v31, 14
-; GCN-NEXT: v_readlane_b32 s14, v31, 15
-; GCN-NEXT: v_readlane_b32 s15, v31, 16
+; GCN-NEXT: v_readlane_b32 s0, v31, 0
+; GCN-NEXT: v_readlane_b32 s1, v31, 1
+; GCN-NEXT: v_readlane_b32 s2, v31, 2
+; GCN-NEXT: v_readlane_b32 s3, v31, 3
+; GCN-NEXT: v_readlane_b32 s4, v31, 4
+; GCN-NEXT: v_readlane_b32 s5, v31, 5
+; GCN-NEXT: v_readlane_b32 s6, v31, 6
+; GCN-NEXT: v_readlane_b32 s7, v31, 7
+; GCN-NEXT: v_readlane_b32 s8, v31, 8
+; GCN-NEXT: v_readlane_b32 s9, v31, 9
+; GCN-NEXT: v_readlane_b32 s10, v31, 10
+; GCN-NEXT: v_readlane_b32 s11, v31, 11
+; GCN-NEXT: v_readlane_b32 s12, v31, 12
+; GCN-NEXT: v_readlane_b32 s13, v31, 13
+; GCN-NEXT: v_readlane_b32 s14, v31, 14
+; GCN-NEXT: v_readlane_b32 s15, v31, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v31, 17
-; GCN-NEXT: v_readlane_b32 s1, v31, 18
-; GCN-NEXT: v_readlane_b32 s2, v31, 19
-; GCN-NEXT: v_readlane_b32 s3, v31, 20
-; GCN-NEXT: v_readlane_b32 s4, v31, 21
-; GCN-NEXT: v_readlane_b32 s5, v31, 22
-; GCN-NEXT: v_readlane_b32 s6, v31, 23
-; GCN-NEXT: v_readlane_b32 s7, v31, 24
-; GCN-NEXT: v_readlane_b32 s8, v31, 25
-; GCN-NEXT: v_readlane_b32 s9, v31, 26
-; GCN-NEXT: v_readlane_b32 s10, v31, 27
-; GCN-NEXT: v_readlane_b32 s11, v31, 28
-; GCN-NEXT: v_readlane_b32 s12, v31, 29
-; GCN-NEXT: v_readlane_b32 s13, v31, 30
-; GCN-NEXT: v_readlane_b32 s14, v31, 31
-; GCN-NEXT: v_readlane_b32 s15, v31, 32
+; GCN-NEXT: v_readlane_b32 s0, v31, 32
+; GCN-NEXT: v_readlane_b32 s1, v31, 33
+; GCN-NEXT: v_readlane_b32 s2, v31, 34
+; GCN-NEXT: v_readlane_b32 s3, v31, 35
+; GCN-NEXT: v_readlane_b32 s4, v31, 36
+; GCN-NEXT: v_readlane_b32 s5, v31, 37
+; GCN-NEXT: v_readlane_b32 s6, v31, 38
+; GCN-NEXT: v_readlane_b32 s7, v31, 39
+; GCN-NEXT: v_readlane_b32 s8, v31, 40
+; GCN-NEXT: v_readlane_b32 s9, v31, 41
+; GCN-NEXT: v_readlane_b32 s10, v31, 42
+; GCN-NEXT: v_readlane_b32 s11, v31, 43
+; GCN-NEXT: v_readlane_b32 s12, v31, 44
+; GCN-NEXT: v_readlane_b32 s13, v31, 45
+; GCN-NEXT: v_readlane_b32 s14, v31, 46
+; GCN-NEXT: v_readlane_b32 s15, v31, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v31, 33
-; GCN-NEXT: v_readlane_b32 s1, v31, 34
-; GCN-NEXT: v_readlane_b32 s2, v31, 35
-; GCN-NEXT: v_readlane_b32 s3, v31, 36
-; GCN-NEXT: v_readlane_b32 s4, v31, 37
-; GCN-NEXT: v_readlane_b32 s5, v31, 38
-; GCN-NEXT: v_readlane_b32 s6, v31, 39
-; GCN-NEXT: v_readlane_b32 s7, v31, 40
-; GCN-NEXT: v_readlane_b32 s8, v31, 41
-; GCN-NEXT: v_readlane_b32 s9, v31, 42
-; GCN-NEXT: v_readlane_b32 s10, v31, 43
-; GCN-NEXT: v_readlane_b32 s11, v31, 44
-; GCN-NEXT: v_readlane_b32 s12, v31, 45
-; GCN-NEXT: v_readlane_b32 s13, v31, 46
-; GCN-NEXT: v_readlane_b32 s14, v31, 47
-; GCN-NEXT: v_readlane_b32 s15, v31, 48
+; GCN-NEXT: v_readlane_b32 s0, v31, 16
+; GCN-NEXT: v_readlane_b32 s1, v31, 17
+; GCN-NEXT: v_readlane_b32 s2, v31, 18
+; GCN-NEXT: v_readlane_b32 s3, v31, 19
+; GCN-NEXT: v_readlane_b32 s4, v31, 20
+; GCN-NEXT: v_readlane_b32 s5, v31, 21
+; GCN-NEXT: v_readlane_b32 s6, v31, 22
+; GCN-NEXT: v_readlane_b32 s7, v31, 23
+; GCN-NEXT: v_readlane_b32 s8, v31, 24
+; GCN-NEXT: v_readlane_b32 s9, v31, 25
+; GCN-NEXT: v_readlane_b32 s10, v31, 26
+; GCN-NEXT: v_readlane_b32 s11, v31, 27
+; GCN-NEXT: v_readlane_b32 s12, v31, 28
+; GCN-NEXT: v_readlane_b32 s13, v31, 29
+; GCN-NEXT: v_readlane_b32 s14, v31, 30
+; GCN-NEXT: v_readlane_b32 s15, v31, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b64 s[0:1], exec
-; GCN-NEXT: s_mov_b64 exec, 0xffff
-; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[0:1]
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-NEXT: v_readlane_b32 s2, v0, 2
-; GCN-NEXT: v_readlane_b32 s3, v0, 3
-; GCN-NEXT: v_readlane_b32 s4, v0, 4
-; GCN-NEXT: v_readlane_b32 s5, v0, 5
-; GCN-NEXT: v_readlane_b32 s6, v0, 6
-; GCN-NEXT: v_readlane_b32 s7, v0, 7
-; GCN-NEXT: v_readlane_b32 s8, v0, 8
-; GCN-NEXT: v_readlane_b32 s9, v0, 9
-; GCN-NEXT: v_readlane_b32 s10, v0, 10
-; GCN-NEXT: v_readlane_b32 s11, v0, 11
-; GCN-NEXT: v_readlane_b32 s12, v0, 12
-; GCN-NEXT: v_readlane_b32 s13, v0, 13
-; GCN-NEXT: v_readlane_b32 s14, v0, 14
-; GCN-NEXT: v_readlane_b32 s15, v0, 15
+; GCN-NEXT: v_readlane_b32 s0, v31, 48
+; GCN-NEXT: v_readlane_b32 s1, v31, 49
+; GCN-NEXT: v_readlane_b32 s2, v31, 50
+; GCN-NEXT: v_readlane_b32 s3, v31, 51
+; GCN-NEXT: v_readlane_b32 s4, v31, 52
+; GCN-NEXT: v_readlane_b32 s5, v31, 53
+; GCN-NEXT: v_readlane_b32 s6, v31, 54
+; GCN-NEXT: v_readlane_b32 s7, v31, 55
+; GCN-NEXT: v_readlane_b32 s8, v31, 56
+; GCN-NEXT: v_readlane_b32 s9, v31, 57
+; GCN-NEXT: v_readlane_b32 s10, v31, 58
+; GCN-NEXT: v_readlane_b32 s11, v31, 59
+; GCN-NEXT: v_readlane_b32 s12, v31, 60
+; GCN-NEXT: v_readlane_b32 s13, v31, 61
+; GCN-NEXT: v_readlane_b32 s14, v31, 62
+; GCN-NEXT: v_readlane_b32 s15, v31, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v31, 49
-; GCN-NEXT: v_readlane_b32 s1, v31, 50
+; GCN-NEXT: s_mov_b64 s[16:17], exec
+; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_readlane_b32 s16, v0, 0
+; GCN-NEXT: v_readlane_b32 s17, v0, 1
; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ; use s[0:1]
+; GCN-NEXT: ; use s[16:17]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: BB2_2: ; %ret
; GCN-NEXT: s_endpgm
ret void
}
-; Force save and restore of m0 during SMEM spill
-; GCN-LABEL: {{^}}m0_unavailable_spill:
-
-; GCN: ; def m0, 1
-
-; GCN: s_mov_b32 m0, s0
-; GCN: v_interp_mov_f32
-
-; GCN: ; clobber m0
-
-; TOSMEM: s_mov_b32 s2, m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
-; TOSMEM: s_mov_b32 m0, s2
-
-; TOSMEM: s_mov_b64 exec,
-; TOSMEM: s_cbranch_execz
-; TOSMEM: s_branch
-
-; TOSMEM: BB{{[0-9]+_[0-9]+}}:
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
-
-; GCN-NOT: v_readlane_b32 m0
-; GCN-NOT: s_buffer_store_dword m0
-; GCN-NOT: s_buffer_load_dword m0
-define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 {
-main_body:
- %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0
- %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg)
- call void asm sideeffect "; clobber $0", "~{m0}"() #0
- %cmp = fcmp ueq float 0.000000e+00, %tmp
- br i1 %cmp, label %if, label %else
-
-if: ; preds = %main_body
- store volatile i32 8, i32 addrspace(1)* undef
- br label %endif
-
-else: ; preds = %main_body
- store volatile i32 11, i32 addrspace(1)* undef
- br label %endif
-
-endif:
- ret void
-}
-
-; GCN-LABEL: {{^}}restore_m0_lds:
-; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
-; TOSMEM: s_cmp_eq_u32
-; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x200
-; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_cbranch_scc1
-
-; TOSMEM: s_mov_b32 m0, -1
-
-; TOSMEM: s_mov_b32 s2, m0
-; TOSMEM: s_add_u32 m0, s3, 0x200
-; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
-; TOSMEM: s_mov_b32 m0, s2
-; TOSMEM: s_waitcnt lgkmcnt(0)
-
-; TOSMEM: ds_write_b64
-
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_waitcnt lgkmcnt(0)
-; TOSMEM-NOT: m0
-; TOSMEM: s_mov_b32 m0, s2
-; TOSMEM: ; use m0
-
-; TOSMEM: s_dcache_wb
-; TOSMEM: s_endpgm
-define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
- %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
- %sval = load volatile i64, i64 addrspace(4)* undef
- %cmp = icmp eq i32 %arg, 0
- br i1 %cmp, label %ret, label %bb
-
-bb:
- store volatile i64 %sval, i64 addrspace(3)* undef
- call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0
- br label %ret
-
-ret:
- ret void
-}
-
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
; GFX9-LABEL: {{^}}call:
define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
-; GFX9-O0: v_mov_b32_e32 v0, s0
+; GFX9-O0: v_mov_b32_e32 v0, s2
; GFX9-O3: v_mov_b32_e32 v2, s0
; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3
; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_not_b64 exec, exec
%tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
; GFX9-O0: buffer_store_dword v1
; GFX9: s_swappc_b64
%tmp134 = call i64 @called_i64(i64 %tmp107)
-; GFX9-O0: buffer_load_dword v4
-; GFX9-O0: buffer_load_dword v5
+; GFX9-O0: buffer_load_dword v6
+; GFX9-O0: buffer_load_dword v7
%tmp136 = add i64 %tmp134, %tmp107
%tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136)
%tmp138 = bitcast i64 %tmp137 to <2 x i32>
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: @ implicit-def: $q9
; CHECK-NEXT: vmov.f64 d18, d16
-; CHECK-NEXT: vrev32.16 q8, q9
-; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8
-; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vrev32.16 q9, q9
+; CHECK-NEXT: @ kill: def $d18 killed $d18 killed $q9
+; CHECK-NEXT: vmov.u16 r0, d18[0]
; CHECK-NEXT: bx lr
%vec = bitcast i80 %in to <5 x i16>
%e0 = extractelement <5 x i16> %vec, i32 0
; FP32-NEXT: mfc1 $1, $f0
; FP32-NEXT: lui $2, 16864
; FP32-NEXT: ori $3, $zero, 0
-; FP32-NEXT: mtc1 $3, $f0
-; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: sub.d $f2, $f12, $f0
-; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $2, $f2
+; FP32-NEXT: mtc1 $3, $f2
+; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: sub.d $f4, $f12, $f2
+; FP32-NEXT: trunc.w.d $f0, $f4
+; FP32-NEXT: mfc1 $2, $f0
; FP32-NEXT: lui $3, 32768
; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
-; FP32-NEXT: c.ult.d $f12, $f0
+; FP32-NEXT: c.ult.d $f12, $f2
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
; FP32-NEXT: movn $2, $1, $3
; FP64-NEXT: mfc1 $1, $f0
; FP64-NEXT: lui $2, 16864
; FP64-NEXT: ori $3, $zero, 0
-; FP64-NEXT: mtc1 $3, $f0
-; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: sub.d $f1, $f12, $f0
-; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $2, $f1
+; FP64-NEXT: mtc1 $3, $f1
+; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: sub.d $f2, $f12, $f1
+; FP64-NEXT: trunc.w.d $f0, $f2
+; FP64-NEXT: mfc1 $2, $f0
; FP64-NEXT: lui $3, 32768
; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
-; FP64-NEXT: c.ult.d $f12, $f0
+; FP64-NEXT: c.ult.d $f12, $f1
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
; FP64-NEXT: movn $2, $1, $3
; FP32-NEXT: mfc1 $1, $f0
; FP32-NEXT: lui $2, 16864
; FP32-NEXT: ori $3, $zero, 0
-; FP32-NEXT: mtc1 $3, $f0
-; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: sub.d $f2, $f12, $f0
-; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $2, $f2
+; FP32-NEXT: mtc1 $3, $f2
+; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: sub.d $f4, $f12, $f2
+; FP32-NEXT: trunc.w.d $f0, $f4
+; FP32-NEXT: mfc1 $2, $f0
; FP32-NEXT: lui $3, 32768
; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
-; FP32-NEXT: c.ult.d $f12, $f0
+; FP32-NEXT: c.ult.d $f12, $f2
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
; FP32-NEXT: movn $2, $1, $3
; FP64-NEXT: mfc1 $1, $f0
; FP64-NEXT: lui $2, 16864
; FP64-NEXT: ori $3, $zero, 0
-; FP64-NEXT: mtc1 $3, $f0
-; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: sub.d $f1, $f12, $f0
-; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $2, $f1
+; FP64-NEXT: mtc1 $3, $f1
+; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: sub.d $f2, $f12, $f1
+; FP64-NEXT: trunc.w.d $f0, $f2
+; FP64-NEXT: mfc1 $2, $f0
; FP64-NEXT: lui $3, 32768
; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
-; FP64-NEXT: c.ult.d $f12, $f0
+; FP64-NEXT: c.ult.d $f12, $f1
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
; FP64-NEXT: movn $2, $1, $3
; FP32-NEXT: mfc1 $1, $f0
; FP32-NEXT: lui $2, 16864
; FP32-NEXT: ori $3, $zero, 0
-; FP32-NEXT: mtc1 $3, $f0
-; FP32-NEXT: mtc1 $2, $f1
-; FP32-NEXT: sub.d $f2, $f12, $f0
-; FP32-NEXT: trunc.w.d $f2, $f2
-; FP32-NEXT: mfc1 $2, $f2
+; FP32-NEXT: mtc1 $3, $f2
+; FP32-NEXT: mtc1 $2, $f3
+; FP32-NEXT: sub.d $f4, $f12, $f2
+; FP32-NEXT: trunc.w.d $f0, $f4
+; FP32-NEXT: mfc1 $2, $f0
; FP32-NEXT: lui $3, 32768
; FP32-NEXT: xor $2, $2, $3
; FP32-NEXT: addiu $3, $zero, 1
-; FP32-NEXT: c.ult.d $f12, $f0
+; FP32-NEXT: c.ult.d $f12, $f2
; FP32-NEXT: movf $3, $zero, $fcc0
; FP32-NEXT: andi $3, $3, 1
; FP32-NEXT: movn $2, $1, $3
; FP64-NEXT: mfc1 $1, $f0
; FP64-NEXT: lui $2, 16864
; FP64-NEXT: ori $3, $zero, 0
-; FP64-NEXT: mtc1 $3, $f0
-; FP64-NEXT: mthc1 $2, $f0
-; FP64-NEXT: sub.d $f1, $f12, $f0
-; FP64-NEXT: trunc.w.d $f1, $f1
-; FP64-NEXT: mfc1 $2, $f1
+; FP64-NEXT: mtc1 $3, $f1
+; FP64-NEXT: mthc1 $2, $f1
+; FP64-NEXT: sub.d $f2, $f12, $f1
+; FP64-NEXT: trunc.w.d $f0, $f2
+; FP64-NEXT: mfc1 $2, $f0
; FP64-NEXT: lui $3, 32768
; FP64-NEXT: xor $2, $2, $3
; FP64-NEXT: addiu $3, $zero, 1
-; FP64-NEXT: c.ult.d $f12, $f0
+; FP64-NEXT: c.ult.d $f12, $f1
; FP64-NEXT: movf $3, $zero, $fcc0
; FP64-NEXT: andi $3, $3, 1
; FP64-NEXT: movn $2, $1, $3
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB4_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB4_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB4_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB4_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB4_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB4_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB4_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB4_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB4_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB4_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB4_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB5_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB5_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB5_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB5_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB5_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB5_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB5_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB5_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB5_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB5_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB5_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB6_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB6_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB6_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB6_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB6_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB6_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB6_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB6_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB6_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB6_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB6_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 65535
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB7_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB7_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB7_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 65535
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB7_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB7_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB7_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 65535
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB7_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB7_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB7_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 65535
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB7_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB7_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB8_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB8_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB8_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB8_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB8_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB8_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB8_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB8_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB8_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB8_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB8_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB9_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: slt $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB9_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: slt $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB9_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB9_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: slt $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB9_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: slt $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB9_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB9_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: slt $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB9_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: slt $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB9_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB9_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: slt $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: slt $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB9_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB10_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movn $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB10_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movn $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB10_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB10_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: seleqz $8, $7, $10
-; MIPS64R6-NEXT: selnez $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB10_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: seleqz $9, $8, $11
+; MIPS64R6-NEXT: selnez $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB10_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB10_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movn $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB10_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movn $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB10_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB10_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: seleqz $8, $7, $10
-; MIPS64ELR6-NEXT: selnez $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: seleqz $9, $8, $11
+; MIPS64ELR6-NEXT: selnez $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB10_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64-NEXT: sll $2, $2, 3
; MIPS64-NEXT: ori $3, $zero, 255
; MIPS64-NEXT: sllv $3, $3, $2
-; MIPS64-NEXT: nor $4, $zero, $3
+; MIPS64-NEXT: nor $6, $zero, $3
; MIPS64-NEXT: sllv $5, $5, $2
; MIPS64-NEXT: .LBB11_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT: ll $7, 0($1)
-; MIPS64-NEXT: sltu $10, $7, $5
-; MIPS64-NEXT: move $8, $7
-; MIPS64-NEXT: movz $8, $5, $10
-; MIPS64-NEXT: and $8, $8, $3
-; MIPS64-NEXT: and $9, $7, $4
-; MIPS64-NEXT: or $9, $9, $8
-; MIPS64-NEXT: sc $9, 0($1)
-; MIPS64-NEXT: beqz $9, .LBB11_1
+; MIPS64-NEXT: ll $8, 0($1)
+; MIPS64-NEXT: sltu $11, $8, $5
+; MIPS64-NEXT: move $9, $8
+; MIPS64-NEXT: movz $9, $5, $11
+; MIPS64-NEXT: and $9, $9, $3
+; MIPS64-NEXT: and $10, $8, $6
+; MIPS64-NEXT: or $10, $10, $9
+; MIPS64-NEXT: sc $10, 0($1)
+; MIPS64-NEXT: beqz $10, .LBB11_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
-; MIPS64-NEXT: and $6, $7, $3
-; MIPS64-NEXT: srlv $6, $6, $2
-; MIPS64-NEXT: seh $6, $6
+; MIPS64-NEXT: and $7, $8, $3
+; MIPS64-NEXT: srlv $7, $7, $2
+; MIPS64-NEXT: seh $7, $7
; MIPS64-NEXT: # %bb.3: # %entry
-; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6-NEXT: sll $2, $2, 3
; MIPS64R6-NEXT: ori $3, $zero, 255
; MIPS64R6-NEXT: sllv $3, $3, $2
-; MIPS64R6-NEXT: nor $4, $zero, $3
+; MIPS64R6-NEXT: nor $6, $zero, $3
; MIPS64R6-NEXT: sllv $5, $5, $2
; MIPS64R6-NEXT: .LBB11_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT: ll $7, 0($1)
-; MIPS64R6-NEXT: sltu $10, $7, $5
-; MIPS64R6-NEXT: selnez $8, $7, $10
-; MIPS64R6-NEXT: seleqz $10, $5, $10
-; MIPS64R6-NEXT: or $8, $8, $10
-; MIPS64R6-NEXT: and $8, $8, $3
-; MIPS64R6-NEXT: and $9, $7, $4
-; MIPS64R6-NEXT: or $9, $9, $8
-; MIPS64R6-NEXT: sc $9, 0($1)
-; MIPS64R6-NEXT: beqzc $9, .LBB11_1
+; MIPS64R6-NEXT: ll $8, 0($1)
+; MIPS64R6-NEXT: sltu $11, $8, $5
+; MIPS64R6-NEXT: selnez $9, $8, $11
+; MIPS64R6-NEXT: seleqz $11, $5, $11
+; MIPS64R6-NEXT: or $9, $9, $11
+; MIPS64R6-NEXT: and $9, $9, $3
+; MIPS64R6-NEXT: and $10, $8, $6
+; MIPS64R6-NEXT: or $10, $10, $9
+; MIPS64R6-NEXT: sc $10, 0($1)
+; MIPS64R6-NEXT: beqzc $10, .LBB11_1
; MIPS64R6-NEXT: # %bb.2: # %entry
-; MIPS64R6-NEXT: and $6, $7, $3
-; MIPS64R6-NEXT: srlv $6, $6, $2
-; MIPS64R6-NEXT: seh $6, $6
+; MIPS64R6-NEXT: and $7, $8, $3
+; MIPS64R6-NEXT: srlv $7, $7, $2
+; MIPS64R6-NEXT: seh $7, $7
; MIPS64R6-NEXT: # %bb.3: # %entry
-; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64EL-NEXT: sll $2, $2, 3
; MIPS64EL-NEXT: ori $3, $zero, 255
; MIPS64EL-NEXT: sllv $3, $3, $2
-; MIPS64EL-NEXT: nor $4, $zero, $3
+; MIPS64EL-NEXT: nor $6, $zero, $3
; MIPS64EL-NEXT: sllv $5, $5, $2
; MIPS64EL-NEXT: .LBB11_1: # %entry
; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT: ll $7, 0($1)
-; MIPS64EL-NEXT: and $7, $7, $3
-; MIPS64EL-NEXT: and $5, $5, $3
-; MIPS64EL-NEXT: sltu $10, $7, $5
-; MIPS64EL-NEXT: move $8, $7
-; MIPS64EL-NEXT: movz $8, $5, $10
+; MIPS64EL-NEXT: ll $8, 0($1)
; MIPS64EL-NEXT: and $8, $8, $3
-; MIPS64EL-NEXT: and $9, $7, $4
-; MIPS64EL-NEXT: or $9, $9, $8
-; MIPS64EL-NEXT: sc $9, 0($1)
-; MIPS64EL-NEXT: beqz $9, .LBB11_1
+; MIPS64EL-NEXT: and $5, $5, $3
+; MIPS64EL-NEXT: sltu $11, $8, $5
+; MIPS64EL-NEXT: move $9, $8
+; MIPS64EL-NEXT: movz $9, $5, $11
+; MIPS64EL-NEXT: and $9, $9, $3
+; MIPS64EL-NEXT: and $10, $8, $6
+; MIPS64EL-NEXT: or $10, $10, $9
+; MIPS64EL-NEXT: sc $10, 0($1)
+; MIPS64EL-NEXT: beqz $10, .LBB11_1
; MIPS64EL-NEXT: nop
; MIPS64EL-NEXT: # %bb.2: # %entry
-; MIPS64EL-NEXT: and $6, $7, $3
-; MIPS64EL-NEXT: srlv $6, $6, $2
-; MIPS64EL-NEXT: seh $6, $6
+; MIPS64EL-NEXT: and $7, $8, $3
+; MIPS64EL-NEXT: srlv $7, $7, $2
+; MIPS64EL-NEXT: seh $7, $7
; MIPS64EL-NEXT: # %bb.3: # %entry
-; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64EL-NEXT: # %bb.4: # %entry
; MIPS64EL-NEXT: sync
; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64ELR6-NEXT: sll $2, $2, 3
; MIPS64ELR6-NEXT: ori $3, $zero, 255
; MIPS64ELR6-NEXT: sllv $3, $3, $2
-; MIPS64ELR6-NEXT: nor $4, $zero, $3
+; MIPS64ELR6-NEXT: nor $6, $zero, $3
; MIPS64ELR6-NEXT: sllv $5, $5, $2
; MIPS64ELR6-NEXT: .LBB11_1: # %entry
; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT: ll $7, 0($1)
-; MIPS64ELR6-NEXT: and $7, $7, $3
-; MIPS64ELR6-NEXT: and $5, $5, $3
-; MIPS64ELR6-NEXT: sltu $10, $7, $5
-; MIPS64ELR6-NEXT: selnez $8, $7, $10
-; MIPS64ELR6-NEXT: seleqz $10, $5, $10
-; MIPS64ELR6-NEXT: or $8, $8, $10
+; MIPS64ELR6-NEXT: ll $8, 0($1)
; MIPS64ELR6-NEXT: and $8, $8, $3
-; MIPS64ELR6-NEXT: and $9, $7, $4
-; MIPS64ELR6-NEXT: or $9, $9, $8
-; MIPS64ELR6-NEXT: sc $9, 0($1)
-; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1
+; MIPS64ELR6-NEXT: and $5, $5, $3
+; MIPS64ELR6-NEXT: sltu $11, $8, $5
+; MIPS64ELR6-NEXT: selnez $9, $8, $11
+; MIPS64ELR6-NEXT: seleqz $11, $5, $11
+; MIPS64ELR6-NEXT: or $9, $9, $11
+; MIPS64ELR6-NEXT: and $9, $9, $3
+; MIPS64ELR6-NEXT: and $10, $8, $6
+; MIPS64ELR6-NEXT: or $10, $10, $9
+; MIPS64ELR6-NEXT: sc $10, 0($1)
+; MIPS64ELR6-NEXT: beqzc $10, .LBB11_1
; MIPS64ELR6-NEXT: # %bb.2: # %entry
-; MIPS64ELR6-NEXT: and $6, $7, $3
-; MIPS64ELR6-NEXT: srlv $6, $6, $2
-; MIPS64ELR6-NEXT: seh $6, $6
+; MIPS64ELR6-NEXT: and $7, $8, $3
+; MIPS64ELR6-NEXT: srlv $7, $7, $2
+; MIPS64ELR6-NEXT: seh $7, $7
; MIPS64ELR6-NEXT: # %bb.3: # %entry
-; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64ELR6-NEXT: # %bb.4: # %entry
; MIPS64ELR6-NEXT: sync
; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB8_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: addu $8, $7, $4
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: addu $9, $8, $4
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB9_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: subu $8, $7, $4
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: subu $9, $8, $4
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB10_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: and $8, $7, $4
-; MIPS64R6O0-NEXT: nor $8, $zero, $8
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: and $9, $8, $4
+; MIPS64R6O0-NEXT: nor $9, $zero, $9
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 255
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB11_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: and $8, $4, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: and $9, $4, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seb $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 3
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 255
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $6, $zero, $3
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 3
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $6, $zero, 255
+; MIPS64R6O0-NEXT: sllv $6, $6, $3
+; MIPS64R6O0-NEXT: nor $7, $zero, $6
; MIPS64R6O0-NEXT: andi $4, $4, 255
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: andi $5, $5, 255
-; MIPS64R6O0-NEXT: sllv $5, $5, $1
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: .LBB12_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $8, 0($2)
-; MIPS64R6O0-NEXT: and $9, $8, $3
-; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3
+; MIPS64R6O0-NEXT: ll $9, 0($2)
+; MIPS64R6O0-NEXT: and $10, $9, $6
+; MIPS64R6O0-NEXT: bnec $10, $4, .LBB12_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1
-; MIPS64R6O0-NEXT: and $8, $8, $6
-; MIPS64R6O0-NEXT: or $8, $8, $5
-; MIPS64R6O0-NEXT: sc $8, 0($2)
-; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1
+; MIPS64R6O0-NEXT: and $9, $9, $7
+; MIPS64R6O0-NEXT: or $9, $9, $5
+; MIPS64R6O0-NEXT: sc $9, 0($2)
+; MIPS64R6O0-NEXT: beqzc $9, .LBB12_1
; MIPS64R6O0-NEXT: .LBB12_3: # %entry
-; MIPS64R6O0-NEXT: srlv $7, $9, $1
-; MIPS64R6O0-NEXT: seb $7, $7
+; MIPS64R6O0-NEXT: srlv $8, $10, $3
+; MIPS64R6O0-NEXT: seb $8, $8
; MIPS64R6O0-NEXT: # %bb.4: # %entry
-; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: sll $2, $2, 3
; MIPS64R6O0-NEXT: ori $3, $zero, 255
; MIPS64R6O0-NEXT: sllv $3, $3, $2
-; MIPS64R6O0-NEXT: nor $4, $zero, $3
-; MIPS64R6O0-NEXT: andi $7, $5, 255
-; MIPS64R6O0-NEXT: sllv $7, $7, $2
+; MIPS64R6O0-NEXT: nor $7, $zero, $3
+; MIPS64R6O0-NEXT: andi $8, $5, 255
+; MIPS64R6O0-NEXT: sllv $8, $8, $2
; MIPS64R6O0-NEXT: andi $6, $6, 255
; MIPS64R6O0-NEXT: sllv $6, $6, $2
; MIPS64R6O0-NEXT: .LBB13_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $9, 0($1)
-; MIPS64R6O0-NEXT: and $10, $9, $3
-; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3
+; MIPS64R6O0-NEXT: ll $10, 0($1)
+; MIPS64R6O0-NEXT: and $11, $10, $3
+; MIPS64R6O0-NEXT: bnec $11, $8, .LBB13_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1
-; MIPS64R6O0-NEXT: and $9, $9, $4
-; MIPS64R6O0-NEXT: or $9, $9, $6
-; MIPS64R6O0-NEXT: sc $9, 0($1)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1
+; MIPS64R6O0-NEXT: and $10, $10, $7
+; MIPS64R6O0-NEXT: or $10, $10, $6
+; MIPS64R6O0-NEXT: sc $10, 0($1)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB13_1
; MIPS64R6O0-NEXT: .LBB13_3: # %entry
-; MIPS64R6O0-NEXT: srlv $8, $10, $2
-; MIPS64R6O0-NEXT: seb $8, $8
+; MIPS64R6O0-NEXT: srlv $9, $11, $2
+; MIPS64R6O0-NEXT: seb $9, $9
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
-; MIPS64R6O0-NEXT: andi $1, $1, 3
-; MIPS64R6O0-NEXT: xori $1, $1, 2
-; MIPS64R6O0-NEXT: sll $1, $1, 3
-; MIPS64R6O0-NEXT: ori $3, $zero, 65535
-; MIPS64R6O0-NEXT: sllv $3, $3, $1
-; MIPS64R6O0-NEXT: nor $5, $zero, $3
-; MIPS64R6O0-NEXT: sllv $4, $4, $1
+; MIPS64R6O0-NEXT: andi $3, $1, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 2
+; MIPS64R6O0-NEXT: sll $3, $3, 3
+; MIPS64R6O0-NEXT: ori $5, $zero, 65535
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
+; MIPS64R6O0-NEXT: nor $6, $zero, $5
+; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB14_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $7, 0($2)
-; MIPS64R6O0-NEXT: addu $8, $7, $4
-; MIPS64R6O0-NEXT: and $8, $8, $3
-; MIPS64R6O0-NEXT: and $9, $7, $5
-; MIPS64R6O0-NEXT: or $9, $9, $8
-; MIPS64R6O0-NEXT: sc $9, 0($2)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1
+; MIPS64R6O0-NEXT: ll $8, 0($2)
+; MIPS64R6O0-NEXT: addu $9, $8, $4
+; MIPS64R6O0-NEXT: and $9, $9, $5
+; MIPS64R6O0-NEXT: and $10, $8, $6
+; MIPS64R6O0-NEXT: or $10, $10, $9
+; MIPS64R6O0-NEXT: sc $10, 0($2)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
-; MIPS64R6O0-NEXT: and $6, $7, $3
-; MIPS64R6O0-NEXT: srlv $6, $6, $1
-; MIPS64R6O0-NEXT: seh $6, $6
+; MIPS64R6O0-NEXT: and $7, $8, $5
+; MIPS64R6O0-NEXT: srlv $7, $7, $3
+; MIPS64R6O0-NEXT: seh $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
-; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
; MIPS64R6O0-NEXT: sll $3, $5, 0
; MIPS64R6O0-NEXT: addu $2, $3, $2
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT: and $3, $4, $3
-; MIPS64R6O0-NEXT: andi $4, $4, 3
-; MIPS64R6O0-NEXT: xori $4, $4, 2
-; MIPS64R6O0-NEXT: sll $4, $4, 3
+; MIPS64R6O0-NEXT: daddiu $8, $zero, -4
+; MIPS64R6O0-NEXT: and $8, $4, $8
+; MIPS64R6O0-NEXT: andi $3, $4, 3
+; MIPS64R6O0-NEXT: xori $3, $3, 2
+; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 65535
-; MIPS64R6O0-NEXT: sllv $5, $5, $4
+; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: andi $7, $2, 65535
-; MIPS64R6O0-NEXT: sllv $7, $7, $4
+; MIPS64R6O0-NEXT: sllv $7, $7, $3
; MIPS64R6O0-NEXT: andi $1, $1, 65535
-; MIPS64R6O0-NEXT: sllv $1, $1, $4
+; MIPS64R6O0-NEXT: sllv $1, $1, $3
; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT: ll $9, 0($3)
-; MIPS64R6O0-NEXT: and $10, $9, $5
-; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3
+; MIPS64R6O0-NEXT: ll $10, 0($8)
+; MIPS64R6O0-NEXT: and $11, $10, $5
+; MIPS64R6O0-NEXT: bnec $11, $7, .LBB15_3
; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; MIPS64R6O0-NEXT: and $9, $9, $6
-; MIPS64R6O0-NEXT: or $9, $9, $1
-; MIPS64R6O0-NEXT: sc $9, 0($3)
-; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1
+; MIPS64R6O0-NEXT: and $10, $10, $6
+; MIPS64R6O0-NEXT: or $10, $10, $1
+; MIPS64R6O0-NEXT: sc $10, 0($8)
+; MIPS64R6O0-NEXT: beqzc $10, .LBB15_1
; MIPS64R6O0-NEXT: .LBB15_3:
-; MIPS64R6O0-NEXT: srlv $8, $10, $4
-; MIPS64R6O0-NEXT: seh $8, $8
+; MIPS64R6O0-NEXT: srlv $9, $11, $3
+; MIPS64R6O0-NEXT: seh $9, $9
; MIPS64R6O0-NEXT: # %bb.4:
; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5:
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
; MIPS64R6O0-NEXT: sc $6, 0($1)
; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1
; MIPS64R6O0-NEXT: .LBB17_3: # %entry
-; MIPS64R6O0-NEXT: xor $1, $5, $3
-; MIPS64R6O0-NEXT: sltiu $2, $1, 1
+; MIPS64R6O0-NEXT: xor $2, $5, $3
+; MIPS64R6O0-NEXT: sltiu $2, $2, 1
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: jrc $ra
;
; CHECK-NEXT: sd $zero, 8($4)
; CHECK-NEXT: daddiu $3, $zero, 30
; CHECK-NEXT: sd $3, 24($4)
-; CHECK-NEXT: addiu $3, $zero, 10
-; CHECK-NEXT: sw $3, 0($4)
+; CHECK-NEXT: addiu $5, $zero, 10
+; CHECK-NEXT: sw $5, 0($4)
; CHECK-NEXT: jr $ra
; CHECK-NEXT: nop
ret { i32, i128, i64 } { i32 10, i128 20, i64 30 }
; CHECK-NEXT: lw $3, 4($sp)
; CHECK-NEXT: # implicit-def: $a0_64
; CHECK-NEXT: move $4, $3
-; CHECK-NEXT: # implicit-def: $v1_64
-; CHECK-NEXT: move $3, $2
-; CHECK-NEXT: # implicit-def: $v0_64
-; CHECK-NEXT: move $2, $1
-; CHECK-NEXT: move $5, $3
-; CHECK-NEXT: move $6, $2
+; CHECK-NEXT: # implicit-def: $a1_64
+; CHECK-NEXT: move $5, $2
+; CHECK-NEXT: # implicit-def: $a2_64
+; CHECK-NEXT: move $6, $1
; CHECK-NEXT: jal use_sret2
; CHECK-NEXT: nop
; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
; CHECK-NEXT: addze 5, 5
; CHECK-NEXT: add 4, 5, 4
; CHECK-NEXT: cmpld 7, 4, 5
-; CHECK-NEXT: mfocrf 4, 1
-; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31
-; CHECK-NEXT: # implicit-def: $x5
-; CHECK-NEXT: mr 5, 4
-; CHECK-NEXT: clrldi 4, 5, 32
+; CHECK-NEXT: mfocrf 10, 1
+; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31
+; CHECK-NEXT: # implicit-def: $x4
+; CHECK-NEXT: mr 4, 10
+; CHECK-NEXT: clrldi 4, 4, 32
; CHECK-NEXT: std 4, 0(3)
; CHECK-NEXT: blr
%1 = load i64, i64* %a, align 8
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0
; CHECK-NEXT: mffprd 3, 0
; CHECK-NEXT: popcntd 3, 3
-; CHECK-NEXT: xxswapd 0, 34
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0
-; CHECK-NEXT: mffprd 4, 0
+; CHECK-NEXT: xxswapd 1, 34
+; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT: mffprd 4, 1
; CHECK-NEXT: popcntd 4, 4
; CHECK-NEXT: add 3, 4, 3
; CHECK-NEXT: mtfprd 0, 3
-; CHECK-NEXT: # kill: def $vsl0 killed $f0
+; CHECK-NEXT: fmr 2, 0
; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: mtfprd 1, 3
-; CHECK-NEXT: # kill: def $vsl1 killed $f1
-; CHECK-NEXT: xxmrghd 34, 1, 0
+; CHECK-NEXT: mtfprd 0, 3
+; CHECK-NEXT: fmr 3, 0
+; CHECK-NEXT: xxmrghd 34, 3, 2
; CHECK-NEXT: blr
Entry:
%1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
; CHECK-FISL-NEXT: ld r3, -24(r1)
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
-; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
+; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs1, vs1
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test46:
; CHECK-FISL-NEXT: ld r3, -24(r1)
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
-; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
+; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3
+; CHECK-FISL-NEXT: xxlor v2, vs1, vs1
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test47:
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: lwz r3, -20(r1)
-; CHECK-FISL-NEXT: ld r4, -40(r1)
-; CHECK-FISL-NEXT: sld r3, r4, r3
+; CHECK-FISL-NEXT: lwz r4, -20(r1)
+; CHECK-FISL-NEXT: ld r3, -40(r1)
+; CHECK-FISL-NEXT: sld r3, r3, r4
; CHECK-FISL-NEXT: std r3, -8(r1)
-; CHECK-FISL-NEXT: lwz r3, -28(r1)
-; CHECK-FISL-NEXT: ld r4, -48(r1)
-; CHECK-FISL-NEXT: sld r3, r4, r3
+; CHECK-FISL-NEXT: lwz r4, -28(r1)
+; CHECK-FISL-NEXT: ld r3, -48(r1)
+; CHECK-FISL-NEXT: sld r3, r3, r4
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: lwz r3, -20(r1)
-; CHECK-FISL-NEXT: ld r4, -40(r1)
-; CHECK-FISL-NEXT: srd r3, r4, r3
+; CHECK-FISL-NEXT: lwz r4, -20(r1)
+; CHECK-FISL-NEXT: ld r3, -40(r1)
+; CHECK-FISL-NEXT: srd r3, r3, r4
; CHECK-FISL-NEXT: std r3, -8(r1)
-; CHECK-FISL-NEXT: lwz r3, -28(r1)
-; CHECK-FISL-NEXT: ld r4, -48(r1)
-; CHECK-FISL-NEXT: srd r3, r4, r3
+; CHECK-FISL-NEXT: lwz r4, -28(r1)
+; CHECK-FISL-NEXT: ld r3, -48(r1)
+; CHECK-FISL-NEXT: srd r3, r3, r4
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
; CHECK-FISL-NEXT: stxvd2x v3, 0, r3
; CHECK-FISL-NEXT: addi r3, r1, -48
; CHECK-FISL-NEXT: stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT: lwz r3, -20(r1)
-; CHECK-FISL-NEXT: ld r4, -40(r1)
-; CHECK-FISL-NEXT: srad r3, r4, r3
+; CHECK-FISL-NEXT: lwz r4, -20(r1)
+; CHECK-FISL-NEXT: ld r3, -40(r1)
+; CHECK-FISL-NEXT: srad r3, r3, r4
; CHECK-FISL-NEXT: std r3, -8(r1)
-; CHECK-FISL-NEXT: lwz r3, -28(r1)
-; CHECK-FISL-NEXT: ld r4, -48(r1)
-; CHECK-FISL-NEXT: srad r3, r4, r3
+; CHECK-FISL-NEXT: lwz r4, -28(r1)
+; CHECK-FISL-NEXT: ld r3, -48(r1)
+; CHECK-FISL-NEXT: srad r3, r3, r4
; CHECK-FISL-NEXT: std r3, -16(r1)
; CHECK-FISL-NEXT: addi r3, r1, -16
; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3
; CHECK-FISL: # %bb.0:
; CHECK-FISL-NEXT: # kill: def $r3 killed $r3 killed $x3
; CHECK-FISL-NEXT: stw r3, -16(r1)
-; CHECK-FISL-NEXT: addi r3, r1, -16
-; CHECK-FISL-NEXT: lxvw4x vs0, 0, r3
+; CHECK-FISL-NEXT: addi r4, r1, -16
+; CHECK-FISL-NEXT: lxvw4x vs0, 0, r4
; CHECK-FISL-NEXT: xxspltw v2, vs0, 0
-; CHECK-FISL-NEXT: addis r3, r2, .LCPI65_0@toc@ha
-; CHECK-FISL-NEXT: addi r3, r3, .LCPI65_0@toc@l
-; CHECK-FISL-NEXT: lxvw4x v3, 0, r3
+; CHECK-FISL-NEXT: addis r4, r2, .LCPI65_0@toc@ha
+; CHECK-FISL-NEXT: addi r4, r4, .LCPI65_0@toc@l
+; CHECK-FISL-NEXT: lxvw4x v3, 0, r4
; CHECK-FISL-NEXT: vadduwm v2, v2, v3
; CHECK-FISL-NEXT: blr
;
; V8-UNOPT-NEXT: std %i4, [%fp+-8]
; V8-UNOPT-NEXT: ldd [%fp+-8], %f0
; V8-UNOPT-NEXT: std %f0, [%fp+-16]
-; V8-UNOPT-NEXT: ldd [%fp+-16], %i0
-; V8-UNOPT-NEXT: mov %i0, %i3
-; V8-UNOPT-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1
-; V8-UNOPT-NEXT: mov %i3, %o0
-; V8-UNOPT-NEXT: mov %i1, %o1
+; V8-UNOPT-NEXT: ldd [%fp+-16], %i4
+; V8-UNOPT-NEXT: mov %i4, %i0
+; V8-UNOPT-NEXT: ! kill: def $i5 killed $i5 killed $i4_i5
+; V8-UNOPT-NEXT: mov %i0, %o0
+; V8-UNOPT-NEXT: mov %i5, %o1
; V8-UNOPT-NEXT: call __truncdfhf2
; V8-UNOPT-NEXT: st %i2, [%fp+-20]
; V8-UNOPT-NEXT: ld [%fp+-20], %i0 ! 4-byte Folded Reload
define i32 @z() nounwind ssp {
; CHECK-LABEL: z:
; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: subl $148, %esp
+; CHECK-NEXT: subl $144, %esp
; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movb $48, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
-; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp)
; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl $8, %ecx
-; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT: movl $8, %edx
+; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: movl %eax, %edi
-; CHECK-NEXT: movl %edx, %esi
+; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: addl $36, %ecx
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; CHECK-NEXT: movl %edx, %esi
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
-; CHECK-NEXT: movb %cl, 32(%eax)
-; CHECK-NEXT: movb %cl, 68(%eax)
+; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
+; CHECK-NEXT: movb %bl, 32(%eax)
+; CHECK-NEXT: movb %bl, 68(%eax)
; CHECK-NEXT: calll _f
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: jne LBB0_3
; CHECK-NEXT: ## %bb.2: ## %SP_return
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
-; CHECK-NEXT: addl $148, %esp
+; CHECK-NEXT: addl $144, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk
; CHECK-NEXT: calll ___stack_chk_fail
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O0-NEXT: andl $-256, %eax
-; CHECK-O0-NEXT: # kill: def $rax killed $eax
-; CHECK-O0-NEXT: movq %rax, (%rdi)
+; CHECK-O0-NEXT: movl %eax, %ecx
+; CHECK-O0-NEXT: movq %rcx, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: narrow_writeback_and:
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-O0-NEXT: .cfi_offset %rbx, -16
; CHECK-O0-NEXT: xorl %eax, %eax
-; CHECK-O0-NEXT: # kill: def $rax killed $eax
-; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-O0-NEXT: movl %eax, %ecx
+; CHECK-O0-NEXT: movq %rcx, %rax
+; CHECK-O0-NEXT: movq %rcx, %rdx
+; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; CHECK-O0-NEXT: lock cmpxchg16b (%rdi)
; CHECK-O0-NEXT: popq %rbx
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: callq __atomic_load
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; CHECK-O0-NEXT: movq %rsi, 24(%rdi)
-; CHECK-O0-NEXT: movq %rdx, 16(%rdi)
-; CHECK-O0-NEXT: movq %rcx, 8(%rdi)
-; CHECK-O0-NEXT: movq %rax, (%rdi)
+; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; CHECK-O0-NEXT: movq %rdi, 24(%r9)
+; CHECK-O0-NEXT: movq %rsi, 16(%r9)
+; CHECK-O0-NEXT: movq %rdx, 8(%r9)
+; CHECK-O0-NEXT: movq %rax, (%r9)
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-O0-NEXT: addq $56, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: xorl %ecx, %ecx
; CHECK-O0-NEXT: movl %ecx, %edx
-; CHECK-O0-NEXT: movl $15, %ecx
-; CHECK-O0-NEXT: divq %rcx
+; CHECK-O0-NEXT: movl $15, %esi
+; CHECK-O0-NEXT: divq %rsi
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_udiv1:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: xorl %ecx, %ecx
; CHECK-O0-NEXT: movl %ecx, %edx
-; CHECK-O0-NEXT: movl $15, %ecx
-; CHECK-O0-NEXT: divq %rcx
+; CHECK-O0-NEXT: movl $15, %esi
+; CHECK-O0-NEXT: divq %rsi
; CHECK-O0-NEXT: movq %rdx, %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: movq (%rsi), %rcx
; CHECK-O0-NEXT: subq %rcx, %rax
-; CHECK-O0-NEXT: sete %cl
+; CHECK-O0-NEXT: sete %dl
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT: movb %cl, %al
+; CHECK-O0-NEXT: movb %dl, %al
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_icmp3:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O0-NEXT: andl $15, %eax
-; CHECK-O0-NEXT: # kill: def $rax killed $eax
-; CHECK-O0-NEXT: movq %rax, (%rdi)
+; CHECK-O0-NEXT: movl %eax, %ecx
+; CHECK-O0-NEXT: movq %rcx, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_and1:
; CHECK-O0-CUR-LABEL: load_i8_anyext_i16:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movb (%rdi), %al
-; CHECK-O0-CUR-NEXT: movzbl %al, %eax
-; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-O0-CUR-NEXT: movzbl %al, %ecx
+; CHECK-O0-CUR-NEXT: # kill: def $cx killed $cx killed $ecx
+; CHECK-O0-CUR-NEXT: movw %cx, %ax
; CHECK-O0-CUR-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_i8_anyext_i16:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb (%rdi), %al
; CHECK-O0-NEXT: movb 1(%rdi), %cl
-; CHECK-O0-NEXT: movzbl %al, %eax
-; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-O0-NEXT: movzbl %cl, %ecx
-; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx
-; CHECK-O0-NEXT: shlw $8, %cx
-; CHECK-O0-NEXT: orw %cx, %ax
+; CHECK-O0-NEXT: movzbl %al, %edx
+; CHECK-O0-NEXT: # kill: def $dx killed $dx killed $edx
+; CHECK-O0-NEXT: movzbl %cl, %esi
+; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi
+; CHECK-O0-NEXT: shlw $8, %si
+; CHECK-O0-NEXT: orw %si, %dx
+; CHECK-O0-NEXT: movw %dx, %ax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: load_combine:
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NEXT: sete %cl
-; X86-NEXT: testb $1, %cl
+; X86-NEXT: sete %dl
+; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NEXT: sete %cl
-; X86-NEXT: testb $1, %cl
+; X86-NEXT: sete %dl
+; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: xorl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NEXT: sete %cl
-; X86-NEXT: testb $1, %cl
+; X86-NEXT: sete %dl
+; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X64-NEXT: andl %edx, %ecx
; X64-NEXT: notl %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB5_2
; X64-NEXT: jmp .LBB5_1
;
; X86-LABEL: atomic_fetch_nand32:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl sc32, %ecx
; X86-NEXT: andl %edx, %ecx
; X86-NEXT: notl %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NEXT: sete %cl
-; X86-NEXT: testb $1, %cl
+; X86-NEXT: sete %bl
+; X86-NEXT: testb $1, %bl
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB5_2
; X86-NEXT: jmp .LBB5_1
; X86-NEXT: .LBB5_2: # %atomicrmw.end
; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%t1 = atomicrmw nand i32* @sc32, i32 %x acquire
ret void
; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmovgl %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB6_2
;
; X86-CMOV-LABEL: atomic_fetch_max32:
; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmovgl %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT: sete %dl
-; X86-CMOV-NEXT: testb $1, %dl
+; X86-CMOV-NEXT: sete %bl
+; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB6_2
; X86-CMOV-NEXT: jmp .LBB6_1
; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp
+; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl
;
; X86-NOCMOV-LABEL: atomic_fetch_max32:
; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT: sete %dl
-; X86-NOCMOV-NEXT: testb $1, %dl
+; X86-NOCMOV-NEXT: sete %bl
+; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB6_2
; X86-NOCMOV-NEXT: jmp .LBB6_1
; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
+; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_max32:
; X86-NOX87: # %bb.0:
+; X86-NOX87-NEXT: pushl %ebx
; X86-NOX87-NEXT: pushl %esi
; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOX87-NEXT: movl %ecx, %eax
; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT: sete %dl
-; X86-NOX87-NEXT: testb $1, %dl
+; X86-NOX87-NEXT: sete %bl
+; X86-NOX87-NEXT: testb $1, %bl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB6_2
; X86-NOX87-NEXT: jmp .LBB6_1
; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end
; X86-NOX87-NEXT: addl $20, %esp
; X86-NOX87-NEXT: popl %esi
+; X86-NOX87-NEXT: popl %ebx
; X86-NOX87-NEXT: retl
%t1 = atomicrmw max i32* @sc32, i32 %x acquire
ret void
; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmovlel %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB7_2
;
; X86-CMOV-LABEL: atomic_fetch_min32:
; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmovlel %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT: sete %dl
-; X86-CMOV-NEXT: testb $1, %dl
+; X86-CMOV-NEXT: sete %bl
+; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB7_2
; X86-CMOV-NEXT: jmp .LBB7_1
; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp
+; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl
;
; X86-NOCMOV-LABEL: atomic_fetch_min32:
; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT: sete %dl
-; X86-NOCMOV-NEXT: testb $1, %dl
+; X86-NOCMOV-NEXT: sete %bl
+; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB7_2
; X86-NOCMOV-NEXT: jmp .LBB7_1
; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
+; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_min32:
; X86-NOX87: # %bb.0:
+; X86-NOX87-NEXT: pushl %ebx
; X86-NOX87-NEXT: pushl %esi
; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOX87-NEXT: movl %ecx, %eax
; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT: sete %dl
-; X86-NOX87-NEXT: testb $1, %dl
+; X86-NOX87-NEXT: sete %bl
+; X86-NOX87-NEXT: testb $1, %bl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB7_2
; X86-NOX87-NEXT: jmp .LBB7_1
; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end
; X86-NOX87-NEXT: addl $20, %esp
; X86-NOX87-NEXT: popl %esi
+; X86-NOX87-NEXT: popl %ebx
; X86-NOX87-NEXT: retl
%t1 = atomicrmw min i32* @sc32, i32 %x acquire
ret void
; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmoval %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB8_2
;
; X86-CMOV-LABEL: atomic_fetch_umax32:
; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmoval %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT: sete %dl
-; X86-CMOV-NEXT: testb $1, %dl
+; X86-CMOV-NEXT: sete %bl
+; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB8_2
; X86-CMOV-NEXT: jmp .LBB8_1
; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp
+; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl
;
; X86-NOCMOV-LABEL: atomic_fetch_umax32:
; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT: sete %dl
-; X86-NOCMOV-NEXT: testb $1, %dl
+; X86-NOCMOV-NEXT: sete %bl
+; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB8_2
; X86-NOCMOV-NEXT: jmp .LBB8_1
; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
+; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_umax32:
; X86-NOX87: # %bb.0:
+; X86-NOX87-NEXT: pushl %ebx
; X86-NOX87-NEXT: pushl %esi
; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOX87-NEXT: movl %ecx, %eax
; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT: sete %dl
-; X86-NOX87-NEXT: testb $1, %dl
+; X86-NOX87-NEXT: sete %bl
+; X86-NOX87-NEXT: testb $1, %bl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB8_2
; X86-NOX87-NEXT: jmp .LBB8_1
; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end
; X86-NOX87-NEXT: addl $20, %esp
; X86-NOX87-NEXT: popl %esi
+; X86-NOX87-NEXT: popl %ebx
; X86-NOX87-NEXT: retl
%t1 = atomicrmw umax i32* @sc32, i32 %x acquire
ret void
; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmovbel %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB9_2
;
; X86-CMOV-LABEL: atomic_fetch_umin32:
; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmovbel %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT: sete %dl
-; X86-CMOV-NEXT: testb $1, %dl
+; X86-CMOV-NEXT: sete %bl
+; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB9_2
; X86-CMOV-NEXT: jmp .LBB9_1
; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp
+; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl
;
; X86-NOCMOV-LABEL: atomic_fetch_umin32:
; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT: sete %dl
-; X86-NOCMOV-NEXT: testb $1, %dl
+; X86-NOCMOV-NEXT: sete %bl
+; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB9_2
; X86-NOCMOV-NEXT: jmp .LBB9_1
; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
+; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
;
; X86-NOX87-LABEL: atomic_fetch_umin32:
; X86-NOX87: # %bb.0:
+; X86-NOX87-NEXT: pushl %ebx
; X86-NOX87-NEXT: pushl %esi
; X86-NOX87-NEXT: subl $20, %esp
; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOX87-NEXT: movl %ecx, %eax
; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT: sete %dl
-; X86-NOX87-NEXT: testb $1, %dl
+; X86-NOX87-NEXT: sete %bl
+; X86-NOX87-NEXT: testb $1, %bl
; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOX87-NEXT: jne .LBB9_2
; X86-NOX87-NEXT: jmp .LBB9_1
; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end
; X86-NOX87-NEXT: addl $20, %esp
; X86-NOX87-NEXT: popl %esi
+; X86-NOX87-NEXT: popl %ebx
; X86-NOX87-NEXT: retl
%t1 = atomicrmw umin i32* @sc32, i32 %x acquire
ret void
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $5, %ecx
-; X64-NEXT: # kill: def $rcx killed $ecx
-; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB2_2
; X64-NEXT: jmp .LBB2_1
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: orq $5, %rcx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: xorq $5, %rcx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: sete %dl
+; X64-NEXT: testb $1, %dl
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: andq %rdx, %rcx
; X64-NEXT: notq %rcx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: sete %cl
-; X64-NEXT: testb $1, %cl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB5_2
; X64-NEXT: jmp .LBB5_1
; X64-NEXT: subq %rdx, %rcx
; X64-NEXT: cmovgq %rax, %rdx
; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB6_2
; X64-NEXT: subq %rdx, %rcx
; X64-NEXT: cmovleq %rax, %rdx
; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB7_2
; X64-NEXT: subq %rdx, %rcx
; X64-NEXT: cmovaq %rax, %rdx
; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB8_2
; X64-NEXT: subq %rdx, %rcx
; X64-NEXT: cmovbeq %rax, %rdx
; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT: sete %dl
-; X64-NEXT: testb $1, %dl
+; X64-NEXT: sete %sil
+; X64-NEXT: testb $1, %sil
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: jne .LBB9_2
; CHECK_O0: # %bb.0:
; CHECK_O0-NEXT: # implicit-def: $ymm2
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
%Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK_O0: # %bb.0:
; CHECK_O0-NEXT: # implicit-def: $ymm2
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
%Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK_O0-NEXT: .LBB9_3: # %cif_mixed_test_all
; CHECK_O0-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967295,0,0,0]
; CHECK_O0-NEXT: vmovdqa %xmm0, %xmm0
-; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0
+; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
; CHECK_O0-NEXT: # implicit-def: $rax
-; CHECK_O0-NEXT: # implicit-def: $ymm1
-; CHECK_O0-NEXT: vmaskmovps %ymm1, %ymm0, (%rax)
+; CHECK_O0-NEXT: # implicit-def: $ymm2
+; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rax)
; CHECK_O0-NEXT: .LBB9_4: # %cif_mixed_test_any_check
allocas:
br i1 undef, label %cif_mask_all, label %cif_mask_mixed
; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1
; CHECK_O0-NEXT: # implicit-def: $ymm2
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
%b = load <8 x i32>, <8 x i32>* %bp, align 1
; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1
; CHECK_O0-NEXT: # implicit-def: $ymm2
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
%b = load <4 x i64>, <4 x i64>* %bp, align 16
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; CHECK-NEXT: vpmovd2m %xmm0, %k0
; CHECK-NEXT: kmovq %k0, %k1
-; CHECK-NEXT: kmovd %k0, %ecx
-; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: movzbl %cl, %ecx
-; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
-; CHECK-NEXT: movl $4, %edx
-; CHECK-NEXT: movl %edx, %esi
+; CHECK-NEXT: kmovd %k0, %esi
+; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi
+; CHECK-NEXT: movzbl %sil, %edi
+; CHECK-NEXT: ## kill: def $di killed $di killed $edi
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT: movq %rcx, %rdi
+; CHECK-NEXT: movl $4, %r8d
+; CHECK-NEXT: movl %r8d, %esi
+; CHECK-NEXT: movl %r8d, %edx
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; CHECK-NEXT: callq _calc_expected_mask_val
; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax
-; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload
-; CHECK-NEXT: movzwl %cx, %edi
+; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r9w ## 2-byte Reload
+; CHECK-NEXT: movzwl %r9w, %edi
; CHECK-NEXT: movzwl %ax, %esi
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: ## kill: def $rax killed $eax
-; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: cqto
-; CHECK-NEXT: movslq %edi, %rcx
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
-; CHECK-NEXT: idivq (%rsi,%rcx,8)
+; CHECK-NEXT: movslq %edi, %rsi
+; CHECK-NEXT: idivq (%rcx,%rsi,8)
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%gep = getelementptr i64, i64* null, i32 %V
bb:
%tmp = load i32, i32* %p, align 4, !dbg !7
; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p)
- ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7
- ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7
+ ; CHECK-NEXT: $ecx = MOV32rr killed $eax, implicit-def $rcx, debug-location !7
+ ; CHECK-NEXT: $rdx = MOV64rr $rcx, debug-location !7
switch i32 %tmp, label %bb7 [
i32 0, label %bb1
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm2
-; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xfloat:
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm2
-; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xdouble:
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm2
-; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt64xi8:
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm2
-; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt32xi16:
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm2
-; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xi32:
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT: # implicit-def: $ymm2
-; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: $ymm1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xi64:
; X64-NOOPT-NEXT: lfence
; X64-NOOPT-NEXT: movq (%rax,%rcx,8), %rax
; X64-NOOPT-NEXT: lfence
-; X64-NOOPT-NEXT: movl (%rax), %eax
+; X64-NOOPT-NEXT: movl (%rax), %edx
; X64-NOOPT-NEXT: lfence
-; X64-NOOPT-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NOOPT-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
if.end: ; preds = %if.then, %for.body
br label %for.inc
; CHECK-O0-LABEL: test_zero_ext:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: movl %edx, %eax
-; CHECK-O0-NEXT: # kill: def $rax killed $eax
-; CHECK-O0-NEXT: movq %rax, 8(%rcx)
+; CHECK-O0-NEXT: movl %eax, %r8d
+; CHECK-O0-NEXT: movq %r8, 8(%rcx)
; CHECK-O0-NEXT: jmp use_foo # TAILCALL
entry:
%0 = addrspacecast i32 addrspace(271)* %i to i32*
; Test that null can be passed as a 32-bit pointer.
define dso_local void @test_null_arg(%struct.Foo* %f) {
-; CHECK-LABEL: test_null_arg:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $40, %rsp
-; CHECK: xorl %edx, %edx
-; CHECK-NEXT: callq test_noop1
-; CHECK-NEXT: nop
-; CHECK-NEXT: addq $40, %rsp
-; CHECK-NEXT: retq
-;
-; CHECK-O0-LABEL: test_null_arg:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: subq $40, %rsp
-; CHECK-O0: xorl %edx, %edx
-; CHECK-O0-NEXT: callq test_noop1
-; CHECK-O0-NEXT: nop
-; CHECK-O0-NEXT: addq $40, %rsp
-; CHECK-O0-NEXT: retq
+; ALL-LABEL: test_null_arg:
+; ALL: # %bb.0: # %entry
+; ALL-NEXT: subq $40, %rsp
+; ALL-NEXT: .seh_stackalloc 40
+; ALL-NEXT: .seh_endprologue
+; ALL-NEXT: xorl %edx, %edx
+; ALL-NEXT: callq test_noop1
+; ALL-NEXT: nop
+; ALL-NEXT: addq $40, %rsp
+; ALL-NEXT: retq
+; ALL-NEXT: .seh_handlerdata
+; ALL-NEXT: .text
+; ALL-NEXT: .seh_endproc
entry:
call void @test_noop1(%struct.Foo* %f, i32 addrspace(270)* null)
ret void
; CHECK-O0-LABEL: test_unrecognized2:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: movl %edx, %eax
-; CHECK-O0-NEXT: # kill: def $rax killed $eax
-; CHECK-O0-NEXT: movq %rax, 16(%rcx)
+; CHECK-O0-NEXT: movl %eax, %r8d
+; CHECK-O0-NEXT: movq %r8, 16(%rcx)
; CHECK-O0-NEXT: jmp use_foo # TAILCALL
entry:
%0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(9)*
}
define i32 @test_load_sptr32(i32 addrspace(270)* %i) {
-; CHECK-LABEL: test_load_sptr32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movslq %ecx, %rax
-; CHECK-NEXT: movl (%rax), %eax
-; CHECK-NEXT: retq
-; CHECK-O0-LABEL: test_load_sptr32:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movslq %ecx, %rax
-; CHECK-O0-NEXT: movl (%rax), %eax
-; CHECK-O0-NEXT: retq
+; ALL-LABEL: test_load_sptr32:
+; ALL: # %bb.0: # %entry
+; ALL-NEXT: movslq %ecx, %rax
+; ALL-NEXT: movl (%rax), %eax
+; ALL-NEXT: retq
entry:
%0 = load i32, i32 addrspace(270)* %i, align 4
ret i32 %0
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl (%rax), %eax
; CHECK-NEXT: retq
+;
; CHECK-O0-LABEL: test_load_uptr32:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: movl %ecx, %eax
-; CHECK-O0-NEXT: # kill: def $rax killed $eax
-; CHECK-O0-NEXT: movl (%rax), %eax
+; CHECK-O0-NEXT: movl %eax, %edx
+; CHECK-O0-NEXT: movl (%rdx), %eax
; CHECK-O0-NEXT: retq
entry:
%0 = load i32, i32 addrspace(271)* %i, align 4
}
define i32 @test_load_ptr64(i32 addrspace(272)* %i) {
-; CHECK-LABEL: test_load_ptr64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl (%rcx), %eax
-; CHECK-NEXT: retq
-; CHECK-O0-LABEL: test_load_ptr64:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl (%rcx), %eax
-; CHECK-O0-NEXT: retq
+; ALL-LABEL: test_load_ptr64:
+; ALL: # %bb.0: # %entry
+; ALL-NEXT: movl (%rcx), %eax
+; ALL-NEXT: retq
entry:
%0 = load i32, i32 addrspace(272)* %i, align 8
ret i32 %0
}
define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) {
-; CHECK-LABEL: test_store_sptr32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movslq %ecx, %rax
-; CHECK-NEXT: movl %edx, (%rax)
-; CHECK-NEXT: retq
-; CHECK-O0-LABEL: test_store_sptr32:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movslq %ecx, %rax
-; CHECK-O0-NEXT: movl %edx, (%rax)
-; CHECK-O0-NEXT: retq
+; ALL-LABEL: test_store_sptr32:
+; ALL: # %bb.0: # %entry
+; ALL-NEXT: movslq %ecx, %rax
+; ALL-NEXT: movl %edx, (%rax)
+; ALL-NEXT: retq
entry:
store i32 %i, i32 addrspace(270)* %s, align 4
ret void
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, (%rax)
; CHECK-NEXT: retq
+;
; CHECK-O0-LABEL: test_store_uptr32:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: movl %ecx, %eax
-; CHECK-O0-NEXT: # kill: def $rax killed $eax
-; CHECK-O0-NEXT: movl %edx, (%rax)
+; CHECK-O0-NEXT: movl %eax, %r8d
+; CHECK-O0-NEXT: movl %edx, (%r8)
; CHECK-O0-NEXT: retq
entry:
store i32 %i, i32 addrspace(271)* %s, align 4
}
define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) {
-; CHECK-LABEL: test_store_ptr64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl %edx, (%rcx)
-; CHECK-NEXT: retq
-; CHECK-O0-LABEL: test_store_ptr64:
-; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: movl %edx, (%rcx)
-; CHECK-O0-NEXT: retq
+; ALL-LABEL: test_store_ptr64:
+; ALL: # %bb.0: # %entry
+; ALL-NEXT: movl %edx, (%rcx)
+; ALL-NEXT: retq
entry:
store i32 %i, i32 addrspace(272)* %s, align 8
ret void
; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E
; CHECK-NEXT: calll _lrintf
; CHECK-NEXT: cmpl $1, %eax
-; CHECK-NEXT: setl %al
-; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setl %cl
+; CHECK-NEXT: andb $1, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
; CHECK-NEXT: movl $-1236950581, (%eax) ## imm = 0xB645A1CB
; CHECK-NEXT: calll _lrint
; CHECK-NEXT: cmpl $1, %eax
-; CHECK-NEXT: setl %al
-; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setl %cl
+; CHECK-NEXT: andb $1, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E
; CHECK-NEXT: calll _lrintf
; CHECK-NEXT: cmpl $1, %eax
-; CHECK-NEXT: setl %al
-; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setl %cl
+; CHECK-NEXT: andb $1, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E
; CHECK-NEXT: calll _lrintf
; CHECK-NEXT: cmpl $1, %eax
-; CHECK-NEXT: setl %al
-; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setl %cl
+; CHECK-NEXT: andb $1, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: cmpl $0, %edi
; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: movl %eax, %edi
+; CHECK-NEXT: movzbl %al, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: movl %ecx, %edi
; CHECK-NEXT: callq callee1
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: cmpl $0, %edi
; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: negl %eax
-; CHECK-NEXT: movl %eax, %edi
+; CHECK-NEXT: movzbl %al, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: negl %ecx
+; CHECK-NEXT: movl %ecx, %edi
; CHECK-NEXT: callq callee2
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; CHECK-NEXT: # implicit-def: $ymm2
; CHECK-NEXT: vmovaps %xmm1, %xmm2
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
; CHECK-NEXT: # implicit-def: $ymm3
-; CHECK-NEXT: vmovaps %xmm2, %xmm3
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; CHECK-NEXT: # implicit-def: $zmm2
-; CHECK-NEXT: vmovaps %ymm1, %ymm2
-; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovaps %xmm1, %xmm3
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3
+; CHECK-NEXT: # implicit-def: $zmm24
+; CHECK-NEXT: vmovaps %zmm3, %zmm24
+; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
+; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s
; CHECK: patatino:
; CHECK: .cfi_startproc
-; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]]
-; CHECK: movq %r[[REG0]], ({{%r[abcd]x}})
+; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]]
+; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x
+; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}})
; CHECK: retq
define void @patatino() {
; CHECK-NEXT: .LBB0_2: # %lor.end
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: cmpl %eax, %ecx
+; CHECK-NEXT: movzbl %al, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT: cmpl %ecx, %edx
; CHECK-NEXT: setl %al
; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: xorl $-1, %eax
-; CHECK-NEXT: cmpl $0, %eax
+; CHECK-NEXT: movzbl %al, %ecx
+; CHECK-NEXT: xorl $-1, %ecx
+; CHECK-NEXT: cmpl $0, %ecx
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: jne .LBB0_4
; CHECK-NEXT: .LBB0_4: # %lor.end5
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movzbl %al, %ecx
+; CHECK-NEXT: # kill: def $cx killed $cx killed $ecx
+; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 4
; X86-O0-LABEL: foo:
; X86-O0: # %bb.0: # %entry
; X86-O0-NEXT: xorl %eax, %eax
-; X86-O0-NEXT: # kill: def $rax killed $eax
-; X86-O0-NEXT: xorl %ecx, %ecx
+; X86-O0-NEXT: movl %eax, %ecx
+; X86-O0-NEXT: xorl %eax, %eax
; X86-O0-NEXT: movzbl c, %edx
-; X86-O0-NEXT: subl %edx, %ecx
-; X86-O0-NEXT: movslq %ecx, %rcx
-; X86-O0-NEXT: subq %rcx, %rax
-; X86-O0-NEXT: # kill: def $al killed $al killed $rax
-; X86-O0-NEXT: cmpb $0, %al
-; X86-O0-NEXT: setne %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT: subl %edx, %eax
+; X86-O0-NEXT: movslq %eax, %rsi
+; X86-O0-NEXT: subq %rsi, %rcx
+; X86-O0-NEXT: # kill: def $cl killed $cl killed $rcx
+; X86-O0-NEXT: cmpb $0, %cl
+; X86-O0-NEXT: setne %cl
+; X86-O0-NEXT: andb $1, %cl
+; X86-O0-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; X86-O0-NEXT: cmpb $0, c
-; X86-O0-NEXT: setne %al
-; X86-O0-NEXT: xorb $-1, %al
-; X86-O0-NEXT: xorb $-1, %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movzbl %al, %eax
-; X86-O0-NEXT: movzbl c, %ecx
-; X86-O0-NEXT: cmpl %ecx, %eax
-; X86-O0-NEXT: setle %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movzbl %al, %eax
+; X86-O0-NEXT: setne %cl
+; X86-O0-NEXT: xorb $-1, %cl
+; X86-O0-NEXT: xorb $-1, %cl
+; X86-O0-NEXT: andb $1, %cl
+; X86-O0-NEXT: movzbl %cl, %eax
+; X86-O0-NEXT: movzbl c, %edx
+; X86-O0-NEXT: cmpl %edx, %eax
+; X86-O0-NEXT: setle %cl
+; X86-O0-NEXT: andb $1, %cl
+; X86-O0-NEXT: movzbl %cl, %eax
; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X86-O0-NEXT: retq
;
; 686-O0-NEXT: xorb $-1, %al
; 686-O0-NEXT: xorb $-1, %al
; 686-O0-NEXT: andb $1, %al
-; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: movzbl c, %ecx
-; 686-O0-NEXT: cmpl %ecx, %eax
+; 686-O0-NEXT: movzbl %al, %ecx
+; 686-O0-NEXT: movzbl c, %edx
+; 686-O0-NEXT: cmpl %edx, %ecx
; 686-O0-NEXT: setle %al
; 686-O0-NEXT: andb $1, %al
-; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: movl %eax, (%esp)
+; 686-O0-NEXT: movzbl %al, %ecx
+; 686-O0-NEXT: movl %ecx, (%esp)
; 686-O0-NEXT: addl $8, %esp
; 686-O0-NEXT: .cfi_def_cfa_offset 4
; 686-O0-NEXT: retl
; X86-O0-NEXT: movabsq $8381627093, %rcx # imm = 0x1F3957AD5
; X86-O0-NEXT: addq %rcx, %rax
; X86-O0-NEXT: cmpq $0, %rax
-; X86-O0-NEXT: setne %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X86-O0-NEXT: movl var_5, %eax
-; X86-O0-NEXT: xorl $-1, %eax
-; X86-O0-NEXT: cmpl $0, %eax
-; X86-O0-NEXT: setne %al
-; X86-O0-NEXT: xorb $-1, %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movzbl %al, %eax
-; X86-O0-NEXT: # kill: def $rax killed $eax
+; X86-O0-NEXT: setne %dl
+; X86-O0-NEXT: andb $1, %dl
+; X86-O0-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT: movl var_5, %esi
+; X86-O0-NEXT: xorl $-1, %esi
+; X86-O0-NEXT: cmpl $0, %esi
+; X86-O0-NEXT: setne %dl
+; X86-O0-NEXT: xorb $-1, %dl
+; X86-O0-NEXT: andb $1, %dl
+; X86-O0-NEXT: movzbl %dl, %esi
+; X86-O0-NEXT: movl %esi, %eax
; X86-O0-NEXT: movslq var_5, %rcx
; X86-O0-NEXT: addq $7093, %rcx # imm = 0x1BB5
; X86-O0-NEXT: cmpq %rcx, %rax
-; X86-O0-NEXT: setg %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movzbl %al, %eax
-; X86-O0-NEXT: # kill: def $rax killed $eax
+; X86-O0-NEXT: setg %dl
+; X86-O0-NEXT: andb $1, %dl
+; X86-O0-NEXT: movzbl %dl, %esi
+; X86-O0-NEXT: movl %esi, %eax
; X86-O0-NEXT: movq %rax, var_57
-; X86-O0-NEXT: movl var_5, %eax
-; X86-O0-NEXT: xorl $-1, %eax
-; X86-O0-NEXT: cmpl $0, %eax
-; X86-O0-NEXT: setne %al
-; X86-O0-NEXT: xorb $-1, %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movzbl %al, %eax
-; X86-O0-NEXT: # kill: def $rax killed $eax
+; X86-O0-NEXT: movl var_5, %esi
+; X86-O0-NEXT: xorl $-1, %esi
+; X86-O0-NEXT: cmpl $0, %esi
+; X86-O0-NEXT: setne %dl
+; X86-O0-NEXT: xorb $-1, %dl
+; X86-O0-NEXT: andb $1, %dl
+; X86-O0-NEXT: movzbl %dl, %esi
+; X86-O0-NEXT: movl %esi, %eax
; X86-O0-NEXT: movq %rax, _ZN8struct_210member_2_0E
; X86-O0-NEXT: retq
;
;
; 686-O0-LABEL: f1:
; 686-O0: # %bb.0: # %entry
-; 686-O0-NEXT: pushl %ebx
+; 686-O0-NEXT: pushl %ebp
; 686-O0-NEXT: .cfi_def_cfa_offset 8
-; 686-O0-NEXT: pushl %edi
+; 686-O0-NEXT: pushl %ebx
; 686-O0-NEXT: .cfi_def_cfa_offset 12
-; 686-O0-NEXT: pushl %esi
+; 686-O0-NEXT: pushl %edi
; 686-O0-NEXT: .cfi_def_cfa_offset 16
+; 686-O0-NEXT: pushl %esi
+; 686-O0-NEXT: .cfi_def_cfa_offset 20
; 686-O0-NEXT: subl $1, %esp
-; 686-O0-NEXT: .cfi_def_cfa_offset 17
-; 686-O0-NEXT: .cfi_offset %esi, -16
-; 686-O0-NEXT: .cfi_offset %edi, -12
-; 686-O0-NEXT: .cfi_offset %ebx, -8
+; 686-O0-NEXT: .cfi_def_cfa_offset 21
+; 686-O0-NEXT: .cfi_offset %esi, -20
+; 686-O0-NEXT: .cfi_offset %edi, -16
+; 686-O0-NEXT: .cfi_offset %ebx, -12
+; 686-O0-NEXT: .cfi_offset %ebp, -8
; 686-O0-NEXT: movl var_5, %eax
; 686-O0-NEXT: movl %eax, %ecx
; 686-O0-NEXT: sarl $31, %ecx
; 686-O0-NEXT: movl var_5, %edi
; 686-O0-NEXT: subl $-1, %edi
; 686-O0-NEXT: sete %bl
-; 686-O0-NEXT: movzbl %bl, %ebx
-; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E
+; 686-O0-NEXT: movzbl %bl, %ebp
+; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E
; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4
; 686-O0-NEXT: addl $1, %esp
-; 686-O0-NEXT: .cfi_def_cfa_offset 16
+; 686-O0-NEXT: .cfi_def_cfa_offset 20
; 686-O0-NEXT: popl %esi
-; 686-O0-NEXT: .cfi_def_cfa_offset 12
+; 686-O0-NEXT: .cfi_def_cfa_offset 16
; 686-O0-NEXT: popl %edi
-; 686-O0-NEXT: .cfi_def_cfa_offset 8
+; 686-O0-NEXT: .cfi_def_cfa_offset 12
; 686-O0-NEXT: popl %ebx
+; 686-O0-NEXT: .cfi_def_cfa_offset 8
+; 686-O0-NEXT: popl %ebp
; 686-O0-NEXT: .cfi_def_cfa_offset 4
; 686-O0-NEXT: retl
;
; X86-O0-NEXT: setne %cl
; X86-O0-NEXT: xorb $-1, %cl
; X86-O0-NEXT: andb $1, %cl
-; X86-O0-NEXT: movzbl %cl, %ecx
-; X86-O0-NEXT: xorl %ecx, %eax
+; X86-O0-NEXT: movzbl %cl, %edx
+; X86-O0-NEXT: xorl %edx, %eax
; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax
; X86-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; X86-O0-NEXT: movzbl var_7, %eax
-; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-O0-NEXT: cmpw $0, %ax
-; X86-O0-NEXT: setne %al
-; X86-O0-NEXT: xorb $-1, %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movzbl %al, %eax
-; X86-O0-NEXT: movzbl var_7, %ecx
-; X86-O0-NEXT: cmpl %ecx, %eax
-; X86-O0-NEXT: sete %al
-; X86-O0-NEXT: andb $1, %al
-; X86-O0-NEXT: movzbl %al, %eax
-; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-O0-NEXT: # implicit-def: $rcx
-; X86-O0-NEXT: movw %ax, (%rcx)
+; X86-O0-NEXT: movzbl var_7, %edx
+; X86-O0-NEXT: # kill: def $dx killed $dx killed $edx
+; X86-O0-NEXT: cmpw $0, %dx
+; X86-O0-NEXT: setne %cl
+; X86-O0-NEXT: xorb $-1, %cl
+; X86-O0-NEXT: andb $1, %cl
+; X86-O0-NEXT: movzbl %cl, %esi
+; X86-O0-NEXT: movzbl var_7, %edi
+; X86-O0-NEXT: cmpl %edi, %esi
+; X86-O0-NEXT: sete %cl
+; X86-O0-NEXT: andb $1, %cl
+; X86-O0-NEXT: movzbl %cl, %esi
+; X86-O0-NEXT: # kill: def $si killed $si killed $esi
+; X86-O0-NEXT: # implicit-def: $r8
+; X86-O0-NEXT: movw %si, (%r8)
; X86-O0-NEXT: retq
;
; X64-LABEL: f2:
;
; 686-O0-LABEL: f2:
; 686-O0: # %bb.0: # %entry
+; 686-O0-NEXT: pushl %edi
+; 686-O0-NEXT: .cfi_def_cfa_offset 8
+; 686-O0-NEXT: pushl %esi
+; 686-O0-NEXT: .cfi_def_cfa_offset 12
; 686-O0-NEXT: subl $2, %esp
-; 686-O0-NEXT: .cfi_def_cfa_offset 6
+; 686-O0-NEXT: .cfi_def_cfa_offset 14
+; 686-O0-NEXT: .cfi_offset %esi, -12
+; 686-O0-NEXT: .cfi_offset %edi, -8
; 686-O0-NEXT: movzbl var_7, %eax
; 686-O0-NEXT: cmpb $0, var_7
; 686-O0-NEXT: setne %cl
; 686-O0-NEXT: xorb $-1, %cl
; 686-O0-NEXT: andb $1, %cl
-; 686-O0-NEXT: movzbl %cl, %ecx
-; 686-O0-NEXT: xorl %ecx, %eax
+; 686-O0-NEXT: movzbl %cl, %edx
+; 686-O0-NEXT: xorl %edx, %eax
; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax
; 686-O0-NEXT: movw %ax, (%esp)
-; 686-O0-NEXT: movzbl var_7, %eax
-; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; 686-O0-NEXT: cmpw $0, %ax
-; 686-O0-NEXT: setne %al
-; 686-O0-NEXT: xorb $-1, %al
-; 686-O0-NEXT: andb $1, %al
-; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: movzbl var_7, %ecx
-; 686-O0-NEXT: cmpl %ecx, %eax
-; 686-O0-NEXT: sete %al
-; 686-O0-NEXT: andb $1, %al
-; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; 686-O0-NEXT: # implicit-def: $ecx
-; 686-O0-NEXT: movw %ax, (%ecx)
+; 686-O0-NEXT: movzbl var_7, %edx
+; 686-O0-NEXT: # kill: def $dx killed $dx killed $edx
+; 686-O0-NEXT: cmpw $0, %dx
+; 686-O0-NEXT: setne %cl
+; 686-O0-NEXT: xorb $-1, %cl
+; 686-O0-NEXT: andb $1, %cl
+; 686-O0-NEXT: movzbl %cl, %esi
+; 686-O0-NEXT: movzbl var_7, %edi
+; 686-O0-NEXT: cmpl %edi, %esi
+; 686-O0-NEXT: sete %cl
+; 686-O0-NEXT: andb $1, %cl
+; 686-O0-NEXT: movzbl %cl, %esi
+; 686-O0-NEXT: # kill: def $si killed $si killed $esi
+; 686-O0-NEXT: # implicit-def: $edi
+; 686-O0-NEXT: movw %si, (%edi)
; 686-O0-NEXT: addl $2, %esp
+; 686-O0-NEXT: .cfi_def_cfa_offset 12
+; 686-O0-NEXT: popl %esi
+; 686-O0-NEXT: .cfi_def_cfa_offset 8
+; 686-O0-NEXT: popl %edi
; 686-O0-NEXT: .cfi_def_cfa_offset 4
; 686-O0-NEXT: retl
;
; X86-O0-NEXT: movl var_13, %eax
; X86-O0-NEXT: xorl $-1, %eax
; X86-O0-NEXT: movl %eax, %eax
-; X86-O0-NEXT: # kill: def $rax killed $eax
+; X86-O0-NEXT: movl %eax, %ecx
; X86-O0-NEXT: cmpl $0, var_13
-; X86-O0-NEXT: setne %cl
-; X86-O0-NEXT: xorb $-1, %cl
-; X86-O0-NEXT: andb $1, %cl
-; X86-O0-NEXT: movzbl %cl, %ecx
-; X86-O0-NEXT: # kill: def $rcx killed $ecx
-; X86-O0-NEXT: movl var_13, %edx
-; X86-O0-NEXT: xorl $-1, %edx
-; X86-O0-NEXT: xorl var_16, %edx
-; X86-O0-NEXT: movl %edx, %edx
-; X86-O0-NEXT: # kill: def $rdx killed $edx
-; X86-O0-NEXT: andq %rdx, %rcx
-; X86-O0-NEXT: orq %rcx, %rax
-; X86-O0-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT: setne %dl
+; X86-O0-NEXT: xorb $-1, %dl
+; X86-O0-NEXT: andb $1, %dl
+; X86-O0-NEXT: movzbl %dl, %eax
+; X86-O0-NEXT: movl %eax, %esi
; X86-O0-NEXT: movl var_13, %eax
; X86-O0-NEXT: xorl $-1, %eax
+; X86-O0-NEXT: xorl var_16, %eax
; X86-O0-NEXT: movl %eax, %eax
-; X86-O0-NEXT: # kill: def $rax killed $eax
+; X86-O0-NEXT: movl %eax, %edi
+; X86-O0-NEXT: andq %rdi, %rsi
+; X86-O0-NEXT: orq %rsi, %rcx
+; X86-O0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT: movl var_13, %eax
+; X86-O0-NEXT: xorl $-1, %eax
+; X86-O0-NEXT: movl %eax, %eax
+; X86-O0-NEXT: movl %eax, %ecx
; X86-O0-NEXT: cmpl $0, var_13
-; X86-O0-NEXT: setne %cl
-; X86-O0-NEXT: xorb $-1, %cl
-; X86-O0-NEXT: andb $1, %cl
-; X86-O0-NEXT: movzbl %cl, %ecx
-; X86-O0-NEXT: # kill: def $rcx killed $ecx
-; X86-O0-NEXT: andq $0, %rcx
-; X86-O0-NEXT: orq %rcx, %rax
-; X86-O0-NEXT: # kill: def $eax killed $eax killed $rax
-; X86-O0-NEXT: movl %eax, var_46
+; X86-O0-NEXT: setne %dl
+; X86-O0-NEXT: xorb $-1, %dl
+; X86-O0-NEXT: andb $1, %dl
+; X86-O0-NEXT: movzbl %dl, %eax
+; X86-O0-NEXT: movl %eax, %esi
+; X86-O0-NEXT: andq $0, %rsi
+; X86-O0-NEXT: orq %rsi, %rcx
+; X86-O0-NEXT: # kill: def $ecx killed $ecx killed $rcx
+; X86-O0-NEXT: movl %ecx, var_46
; X86-O0-NEXT: retq
;
; X64-LABEL: f3:
; 686-O0-NEXT: .cfi_offset %ebp, -8
; 686-O0-NEXT: movl %esp, %ebp
; 686-O0-NEXT: .cfi_def_cfa_register %ebp
+; 686-O0-NEXT: pushl %edi
; 686-O0-NEXT: pushl %esi
; 686-O0-NEXT: andl $-8, %esp
-; 686-O0-NEXT: subl $16, %esp
-; 686-O0-NEXT: .cfi_offset %esi, -12
+; 686-O0-NEXT: subl $8, %esp
+; 686-O0-NEXT: .cfi_offset %esi, -16
+; 686-O0-NEXT: .cfi_offset %edi, -12
; 686-O0-NEXT: movl var_13, %eax
; 686-O0-NEXT: movl %eax, %ecx
; 686-O0-NEXT: notl %ecx
; 686-O0-NEXT: testl %eax, %eax
-; 686-O0-NEXT: sete %al
-; 686-O0-NEXT: movzbl %al, %eax
-; 686-O0-NEXT: movl var_16, %edx
-; 686-O0-NEXT: movl %ecx, %esi
-; 686-O0-NEXT: xorl %edx, %esi
-; 686-O0-NEXT: andl %esi, %eax
+; 686-O0-NEXT: sete %dl
+; 686-O0-NEXT: movzbl %dl, %eax
+; 686-O0-NEXT: movl var_16, %esi
+; 686-O0-NEXT: movl %ecx, %edi
+; 686-O0-NEXT: xorl %esi, %edi
+; 686-O0-NEXT: andl %edi, %eax
; 686-O0-NEXT: orl %eax, %ecx
; 686-O0-NEXT: movl %ecx, (%esp)
; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp)
; 686-O0-NEXT: movl var_13, %eax
; 686-O0-NEXT: notl %eax
; 686-O0-NEXT: movl %eax, var_46
-; 686-O0-NEXT: leal -4(%ebp), %esp
+; 686-O0-NEXT: leal -8(%ebp), %esp
; 686-O0-NEXT: popl %esi
+; 686-O0-NEXT: popl %edi
; 686-O0-NEXT: popl %ebp
; 686-O0-NEXT: .cfi_def_cfa %esp, 4
; 686-O0-NEXT: retl
; X64-LABEL: foo:
; X64: # %bb.0: # %entry
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: # kill: def $rax killed $eax
+; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movw $0, var_825
-; X64-NEXT: movzwl var_32, %ecx
+; X64-NEXT: movzwl var_32, %eax
; X64-NEXT: movzwl var_901, %edx
-; X64-NEXT: movl %ecx, %esi
+; X64-NEXT: movl %eax, %esi
; X64-NEXT: xorl %edx, %esi
-; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: movl %eax, %edx
; X64-NEXT: xorl %esi, %edx
-; X64-NEXT: addl %ecx, %edx
-; X64-NEXT: movslq %edx, %rcx
-; X64-NEXT: movq %rcx, var_826
-; X64-NEXT: movzwl var_32, %ecx
-; X64-NEXT: # kill: def $rcx killed $ecx
-; X64-NEXT: movzwl var_901, %edx
-; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D
-; X64-NEXT: movslq %edx, %rdx
-; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440
-; X64-NEXT: xorq %rsi, %rdx
-; X64-NEXT: movq %rcx, %rsi
-; X64-NEXT: xorq %rdx, %rsi
-; X64-NEXT: xorq $-1, %rsi
-; X64-NEXT: xorq %rsi, %rcx
-; X64-NEXT: movq %rcx, %rdx
-; X64-NEXT: orq var_57, %rdx
-; X64-NEXT: orq %rdx, %rcx
-; X64-NEXT: # kill: def $cx killed $cx killed $rcx
-; X64-NEXT: movw %cx, var_900
-; X64-NEXT: cmpq var_28, %rax
-; X64-NEXT: setne %al
-; X64-NEXT: andb $1, %al
-; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: addl %eax, %edx
+; X64-NEXT: movslq %edx, %rdi
+; X64-NEXT: movq %rdi, var_826
+; X64-NEXT: movzwl var_32, %eax
+; X64-NEXT: movl %eax, %edi
+; X64-NEXT: movzwl var_901, %eax
+; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D
+; X64-NEXT: movslq %eax, %r8
+; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440
+; X64-NEXT: xorq %r9, %r8
+; X64-NEXT: movq %rdi, %r9
+; X64-NEXT: xorq %r8, %r9
+; X64-NEXT: xorq $-1, %r9
+; X64-NEXT: xorq %r9, %rdi
+; X64-NEXT: movq %rdi, %r8
+; X64-NEXT: orq var_57, %r8
+; X64-NEXT: orq %r8, %rdi
+; X64-NEXT: # kill: def $di killed $di killed $rdi
+; X64-NEXT: movw %di, var_900
+; X64-NEXT: cmpq var_28, %rcx
+; X64-NEXT: setne %r10b
+; X64-NEXT: andb $1, %r10b
+; X64-NEXT: movzbl %r10b, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: movw %ax, var_827
; X64-NEXT: retq
; X640-NEXT: xorl %ecx, %eax
; X640-NEXT: movzwl var_27, %ecx
; X640-NEXT: xorl %ecx, %eax
-; X640-NEXT: cltq
-; X640-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; X640-NEXT: movslq %eax, %rdx
+; X640-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; X640-NEXT: movzwl var_22, %eax
; X640-NEXT: movzwl var_27, %ecx
; X640-NEXT: xorl %ecx, %eax
; X640-NEXT: movzwl var_27, %ecx
; X640-NEXT: xorl %ecx, %eax
-; X640-NEXT: cltq
-; X640-NEXT: movzwl var_27, %ecx
-; X640-NEXT: subl $16610, %ecx # imm = 0x40E2
-; X640-NEXT: movl %ecx, %ecx
-; X640-NEXT: # kill: def $rcx killed $ecx
+; X640-NEXT: movslq %eax, %rdx
+; X640-NEXT: movzwl var_27, %eax
+; X640-NEXT: subl $16610, %eax # imm = 0x40E2
+; X640-NEXT: movl %eax, %eax
+; X640-NEXT: movl %eax, %ecx
; X640-NEXT: # kill: def $cl killed $rcx
-; X640-NEXT: sarq %cl, %rax
-; X640-NEXT: # kill: def $al killed $al killed $rax
-; X640-NEXT: # implicit-def: $rcx
-; X640-NEXT: movb %al, (%rcx)
+; X640-NEXT: sarq %cl, %rdx
+; X640-NEXT: # kill: def $dl killed $dl killed $rdx
+; X640-NEXT: # implicit-def: $rsi
+; X640-NEXT: movb %dl, (%rsi)
; X640-NEXT: retq
;
; 6860-LABEL: foo:
; 6860-NEXT: .cfi_offset %ebp, -8
; 6860-NEXT: movl %esp, %ebp
; 6860-NEXT: .cfi_def_cfa_register %ebp
+; 6860-NEXT: pushl %ebx
+; 6860-NEXT: pushl %edi
+; 6860-NEXT: pushl %esi
; 6860-NEXT: andl $-8, %esp
-; 6860-NEXT: subl $24, %esp
+; 6860-NEXT: subl $32, %esp
+; 6860-NEXT: .cfi_offset %esi, -20
+; 6860-NEXT: .cfi_offset %edi, -16
+; 6860-NEXT: .cfi_offset %ebx, -12
; 6860-NEXT: movw var_22, %ax
; 6860-NEXT: movzwl var_27, %ecx
; 6860-NEXT: movw %cx, %dx
; 6860-NEXT: xorw %dx, %ax
-; 6860-NEXT: # implicit-def: $edx
-; 6860-NEXT: movw %ax, %dx
-; 6860-NEXT: xorl %ecx, %edx
-; 6860-NEXT: # kill: def $dx killed $dx killed $edx
-; 6860-NEXT: movzwl %dx, %eax
-; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; 6860-NEXT: # implicit-def: $esi
+; 6860-NEXT: movw %ax, %si
+; 6860-NEXT: xorl %ecx, %esi
+; 6860-NEXT: # kill: def $si killed $si killed $esi
+; 6860-NEXT: movzwl %si, %ecx
+; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; 6860-NEXT: movl $0, {{[0-9]+}}(%esp)
; 6860-NEXT: movw var_22, %ax
; 6860-NEXT: movzwl var_27, %ecx
; 6860-NEXT: movw %cx, %dx
; 6860-NEXT: xorw %dx, %ax
-; 6860-NEXT: # implicit-def: $edx
-; 6860-NEXT: movw %ax, %dx
-; 6860-NEXT: xorl %ecx, %edx
-; 6860-NEXT: # kill: def $dx killed $dx killed $edx
-; 6860-NEXT: movzwl %dx, %eax
+; 6860-NEXT: # implicit-def: $edi
+; 6860-NEXT: movw %ax, %di
+; 6860-NEXT: xorl %ecx, %edi
+; 6860-NEXT: # kill: def $di killed $di killed $edi
+; 6860-NEXT: movzwl %di, %ebx
; 6860-NEXT: # kill: def $cl killed $cl killed $ecx
; 6860-NEXT: addb $30, %cl
-; 6860-NEXT: xorl %edx, %edx
+; 6860-NEXT: xorl %eax, %eax
; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; 6860-NEXT: shrdl %cl, %edx, %eax
+; 6860-NEXT: shrdl %cl, %eax, %ebx
; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
; 6860-NEXT: testb $32, %cl
+; 6860-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: jne .LBB0_2
; 6860-NEXT: # %bb.1: # %bb
; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; 6860-NEXT: # kill: def $al killed $al killed $eax
; 6860-NEXT: # implicit-def: $ecx
; 6860-NEXT: movb %al, (%ecx)
-; 6860-NEXT: movl %ebp, %esp
+; 6860-NEXT: leal -12(%ebp), %esp
+; 6860-NEXT: popl %esi
+; 6860-NEXT: popl %edi
+; 6860-NEXT: popl %ebx
; 6860-NEXT: popl %ebp
; 6860-NEXT: .cfi_def_cfa %esp, 4
; 6860-NEXT: retl
define i8** @japi1_convert_690(i8**, i8***, i32) {
; CHECK-LABEL: japi1_convert_690:
; CHECK: # %bb.0: # %top
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: subl $16, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset %ebx, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
; CHECK-NEXT: calll julia.gc_root_decl
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
; CHECK-NEXT: calll jl_get_ptls_states
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
; CHECK-NEXT: movl 4(%ecx), %edx
-; CHECK-NEXT: movb (%edx), %dl
-; CHECK-NEXT: andb $1, %dl
-; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: movb (%edx), %bl
+; CHECK-NEXT: andb $1, %bl
+; CHECK-NEXT: movzbl %bl, %edx
; CHECK-NEXT: movl %edx, (%esp)
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
; CHECK-NEXT: calll jl_box_int32
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
; CHECK-NEXT: movl %eax, (%ecx)
; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: popl %ebx
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
top:
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
-; CHECK-NEXT: subq $160, %rsp
+; CHECK-NEXT: subq $192, %rsp
; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0]
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7]
-; CHECK-NEXT: vmovaps %xmm7, %xmm2
-; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
-; CHECK-NEXT: # implicit-def: $ymm9
-; CHECK-NEXT: vmovaps %xmm2, %xmm9
-; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
-; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
-; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7]
+; CHECK-NEXT: vmovaps %xmm7, %xmm9
+; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: # implicit-def: $ymm2
+; CHECK-NEXT: vmovaps %xmm9, %xmm2
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload
+; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
+; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm9[4,5,6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3]
; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero
; CHECK-NEXT: # implicit-def: $ymm8
; CHECK-NEXT: vmovaps %xmm7, %xmm8
-; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm8[0,1],ymm6[0,1]
; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm5, %ymm1
+; CHECK-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm6, %ymm2
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload
; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm9, %ymm3
+; CHECK-NEXT: vmovaps %ymm5, %ymm3
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: vmovaps %xmm1, %xmm2
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; CHECK-NEXT: vmovdqa %ymm0, (%rsp)
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2
+; CHECK-NEXT: vmovdqa %ymm2, (%rsp)
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: callq _b
; CHECK-NEXT: cvtsi2sd %eax, %xmm0
-; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax
-; CHECK-NEXT: subq $-1, %rax
-; CHECK-NEXT: setne %cl
-; CHECK-NEXT: movzbl %cl, %ecx
-; CHECK-NEXT: ## kill: def $rcx killed $ecx
-; CHECK-NEXT: leaq {{.*}}(%rip), %rdx
+; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rcx
+; CHECK-NEXT: subq $-1, %rcx
+; CHECK-NEXT: setne %dl
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: ucomisd %xmm1, %xmm0
-; CHECK-NEXT: setae %cl
-; CHECK-NEXT: movzbl %cl, %ecx
-; CHECK-NEXT: ## kill: def $rcx killed $ecx
-; CHECK-NEXT: leaq {{.*}}(%rip), %rdx
+; CHECK-NEXT: setae %dl
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: cvttsd2si %xmm0, %ecx
-; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill
-; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: cvttsd2si %xmm0, %eax
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
entry:
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl $124, %esp
-; CHECK-NEXT: movl 144(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: movw 176(%esp), %dx
-; CHECK-NEXT: movw 172(%esp), %si
-; CHECK-NEXT: movw 168(%esp), %di
-; CHECK-NEXT: movw 164(%esp), %bx
-; CHECK-NEXT: movw 160(%esp), %bp
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %si
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %di
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bx
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bp
+; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
+; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
+; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload
+; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw %si, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw %di, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movw %bx, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movw 156(%esp), %ax
-; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
-; CHECK-NEXT: movw 152(%esp), %ax
-; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
-; CHECK-NEXT: movw 148(%esp), %ax
-; CHECK-NEXT: movw %ax, 112(%esp)
-; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
-; CHECK-NEXT: movw %ax, 114(%esp)
-; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
-; CHECK-NEXT: movw %ax, 116(%esp)
-; CHECK-NEXT: movw %bp, 118(%esp)
-; CHECK-NEXT: movw %dx, 110(%esp)
-; CHECK-NEXT: movw %si, 108(%esp)
-; CHECK-NEXT: movw %di, 106(%esp)
-; CHECK-NEXT: movw %bx, 104(%esp)
-; CHECK-NEXT: movzwl 118(%esp), %edx
-; CHECK-NEXT: movzwl 116(%esp), %esi
-; CHECK-NEXT: movzwl 114(%esp), %edi
-; CHECK-NEXT: movzwl 112(%esp), %ebx
-; CHECK-NEXT: movzwl 110(%esp), %ebp
-; CHECK-NEXT: movzwl 108(%esp), %eax
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl 106(%esp), %eax
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movzwl 104(%esp), %eax
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl %ebx, (%eax)
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT: movl %ecx, (%eax)
; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __gnu_f2h_ieee
; CHECK-NEXT: movl %esp, %ecx
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT: movl %esi, (%ecx)
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT: movl %esi, (%ecx)
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fstps 4(%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: fstps 4(%ecx)
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
-; CHECK-NEXT: fstps (%eax)
+; CHECK-NEXT: fstps (%ecx)
; CHECK-NEXT: calll fmodf
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fstps (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: fstps (%ecx)
; CHECK-NEXT: calll __gnu_f2h_ieee
; CHECK-NEXT: movl %esp, %ecx
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT: movl %esi, (%ecx)
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT: movl %esi, (%ecx)
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fstps 4(%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: fstps 4(%ecx)
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
-; CHECK-NEXT: fstps (%eax)
+; CHECK-NEXT: fstps (%ecx)
; CHECK-NEXT: calll fmodf
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fstps (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: fstps (%ecx)
; CHECK-NEXT: calll __gnu_f2h_ieee
; CHECK-NEXT: movl %esp, %ecx
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT: movl %esi, (%ecx)
; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT: movl %ecx, (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT: movl %esi, (%ecx)
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: calll __gnu_h2f_ieee
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fstps 4(%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: fstps 4(%ecx)
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
-; CHECK-NEXT: fstps (%eax)
+; CHECK-NEXT: fstps (%ecx)
; CHECK-NEXT: calll fmodf
-; CHECK-NEXT: movl %esp, %eax
-; CHECK-NEXT: fstps (%eax)
+; CHECK-NEXT: movl %esp, %ecx
+; CHECK-NEXT: fstps (%ecx)
; CHECK-NEXT: calll __gnu_f2h_ieee
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movw %ax, 6(%ecx)
; CHECK-NEXT: movw %ax, 4(%ecx)
; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
; CHECK-NEXT: movw %dx, 2(%ecx)
-; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %si # 2-byte Reload
-; CHECK-NEXT: movw %si, (%ecx)
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload
+; CHECK-NEXT: movw %bp, (%ecx)
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: addl $124, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK: successors: %bb.3(0x80000000)
; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags
- ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
+ ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8)
- ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0)
+ ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.0)
; CHECK: bb.3:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
; CHECK: renamable $ecx = MOV32r0 implicit-def dead $eflags
- ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
- ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.1)
+ ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
+ ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.1)
; CHECK: JMP64r killed renamable $rax
bb.0:
liveins: $edi, $rsi
; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi
; CHECK-O0-NEXT: callq gen
-; CHECK-O0-NEXT: cwtl
-; CHECK-O0-NEXT: movsbl %dl, %ecx
-; CHECK-O0-NEXT: addl %ecx, %eax
-; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-O0-NEXT: movswl %ax, %ecx
+; CHECK-O0-NEXT: movsbl %dl, %esi
+; CHECK-O0-NEXT: addl %esi, %ecx
+; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx
+; CHECK-O0-NEXT: movw %cx, %ax
; CHECK-O0-NEXT: popq %rcx
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi
; CHECK-O0-NEXT: movq %rsp, %rax
; CHECK-O0-NEXT: callq gen2
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx
-; CHECK-O0-NEXT: movl (%rsp), %esi
-; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi
-; CHECK-O0-NEXT: addl %edi, %esi
-; CHECK-O0-NEXT: addl %edx, %esi
-; CHECK-O0-NEXT: addl %ecx, %esi
-; CHECK-O0-NEXT: addl %eax, %esi
-; CHECK-O0-NEXT: movl %esi, %eax
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi
+; CHECK-O0-NEXT: movl (%rsp), %edi
+; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %r8d
+; CHECK-O0-NEXT: addl %r8d, %edi
+; CHECK-O0-NEXT: addl %esi, %edi
+; CHECK-O0-NEXT: addl %edx, %edi
+; CHECK-O0-NEXT: addl %ecx, %edi
+; CHECK-O0-NEXT: movl %edi, %eax
; CHECK-O0-NEXT: addq $24, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-O0-NEXT: callq produce_i1_ret
; CHECK-O0-NEXT: andb $1, %al
-; CHECK-O0-NEXT: movzbl %al, %eax
-; CHECK-O0-NEXT: movl %eax, var
+; CHECK-O0-NEXT: movzbl %al, %esi
+; CHECK-O0-NEXT: movl %esi, var
; CHECK-O0-NEXT: andb $1, %dl
-; CHECK-O0-NEXT: movzbl %dl, %eax
-; CHECK-O0-NEXT: movl %eax, var
+; CHECK-O0-NEXT: movzbl %dl, %esi
+; CHECK-O0-NEXT: movl %esi, var
; CHECK-O0-NEXT: andb $1, %cl
-; CHECK-O0-NEXT: movzbl %cl, %eax
-; CHECK-O0-NEXT: movl %eax, var
+; CHECK-O0-NEXT: movzbl %cl, %esi
+; CHECK-O0-NEXT: movl %esi, var
; CHECK-O0-NEXT: andb $1, %r8b
-; CHECK-O0-NEXT: movzbl %r8b, %eax
-; CHECK-O0-NEXT: movl %eax, var
+; CHECK-O0-NEXT: movzbl %r8b, %esi
+; CHECK-O0-NEXT: movl %esi, var
; CHECK-O0-NEXT: popq %rax
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
; CHECK-O0-LABEL: testAssign4
; CHECK-O0: callq _foo2
; CHECK-O0: xorl %eax, %eax
-; CHECK-O0: ## kill: def $rax killed $eax
-; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]]
+; CHECK-O0: movl %eax, %ecx
+; CHECK-O0: movq %rcx, [[SLOT:[-a-z0-9\(\)\%]*]]
; CHECK-O0: movq [[SLOT]], %rax
; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]]
; CHECK-O0: movq [[SLOT2]], %r12
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3
; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000
-; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref
+; DWARF4-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref
; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000
-; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref
+; DWARF3-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref
; CHECK-NOT: DW_TAG
; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla")
; Check the DEBUG_VALUE comments for good measure.
; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
; vla should have a register-indirect address at one point.
-; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0]
-; ASM-CHECK: DW_OP_breg2
+; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rdx+0]
+; ASM-CHECK: DW_OP_breg1
; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT
; PRETTY-PRINT: DIExpression(DW_OP_deref)