// Return the matching instruction if one is found, else MBB->end().
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
LdStPairFlags &Flags,
- unsigned Limit);
+ unsigned Limit,
+ bool FindNarrowMerge);
// Scan the instructions looking for a store that writes to the address from
// which the current load instruction reads. Return true if one is found.
MergeMI->eraseFromParent();
return NextI;
}
- assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store");
+ assert(isPromotableZeroStoreInst(I) && isPromotableZeroStoreInst(MergeMI) &&
+ "Expected promotable zero store");
// Construct the new instruction.
MachineInstrBuilder MIB;
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- LdStPairFlags &Flags, unsigned Limit) {
+ LdStPairFlags &Flags, unsigned Limit,
+ bool FindNarrowMerge) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
// safely transform. Similarly, stop if we see a hint to avoid pairs.
if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
return E;
- // If the resultant immediate offset of merging these instructions
- // is out of range for a pairwise instruction, bail and keep looking.
- bool IsNarrowLoad = isNarrowLoad(MI->getOpcode());
- if (!IsNarrowLoad &&
- !inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- MemInsns.push_back(MI);
- continue;
- }
- if (IsNarrowLoad || IsPromotableZeroStore) {
+ if (FindNarrowMerge) {
// If the alignment requirements of the scaled wide load/store
- // instruction can't express the offset of the scaled narrow
- // input, bail and keep looking.
- if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
+ // instruction can't express the offset of the scaled narrow input,
+ // bail and keep looking. For promotable zero stores, allow only when
+ // the stored value is the same (i.e., WZR).
+ if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
+ (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(MI);
continue;
}
} else {
+ // If the resultant immediate offset of merging these instructions
+ // is out of range for a pairwise instruction, bail and keep looking.
+ if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ MemInsns.push_back(MI);
+ continue;
+ }
// If the alignment requirements of the paired (scaled) instruction
// can't express the offset of the unscaled input, bail and keep
// looking.
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- // For narrow stores, allow only when the stored value is the same
- // (i.e., WZR).
- if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
- (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
+ if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(MI);
continue;
// Look ahead up to LdStLimit instructions for a mergable instruction.
LdStPairFlags Flags;
MachineBasicBlock::iterator MergeMI =
- findMatchingInsn(MBBI, Flags, LdStLimit);
+ findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
if (MergeMI != E) {
if (isNarrowLoad(MI)) {
++NumNarrowLoadsPromoted;
// Look ahead up to LdStLimit instructions for a pairable instruction.
LdStPairFlags Flags;
- MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, LdStLimit);
+ MachineBasicBlock::iterator Paired =
+ findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
if (Paired != E) {
++NumPairCreated;
if (TII->isUnscaledLdSt(MI))
ret void
}
-;CHECK-LABEL: Strw_zero
-;CHECK : str xzr
+; CHECK-LABEL: Strw_zero
+; CHECK: str xzr
define void @Strw_zero(i32* nocapture %P, i32 %n) {
entry:
%idxprom = sext i32 %n to i64
ret void
}
-;CHECK-LABEL: Strw_zero_4
-;CHECK : stp xzr
+; CHECK-LABEL: Strw_zero_nonzero
+; CHECK: stp wzr, w1
+define void @Strw_zero_nonzero(i32* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+ store i32 %n, i32* %arrayidx2
+ ret void
+}
+
+; CHECK-LABEL: Strw_zero_4
+; CHECK: stp xzr
define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
entry:
%idxprom = sext i32 %n to i64
ret void
}
-;CHECK-LABEL: Sturw_zero
-;CHECK : stur xzr
+; CHECK-LABEL: Sturw_zero
+; CHECK: stur xzr
define void @Sturw_zero(i32* nocapture %P, i32 %n) {
entry:
%sub = add nsw i32 %n, -3
ret void
}
-;CHECK-LABEL: Sturw_zero_4
-;CHECK : str xzr
+; CHECK-LABEL: Sturw_zero_4
+; CHECK: stp xzr, xzr
define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
entry:
%sub = add nsw i32 %n, -3