// Is V an instruction thats result can trivially promoted, or has safe
// wrapping.
bool isLegalToPromote(Value *V);
- bool TryToPromote(Value *V, unsigned PromotedWidth);
+ bool TryToPromote(Value *V, unsigned PromotedWidth, const LoopInfo &LI);
public:
static char ID;
return false;
}
-bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
+bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth,
+ const LoopInfo &LI) {
Type *OrigTy = V->getType();
TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize();
SafeToPromote.clear();
unsigned ToPromote = 0;
unsigned NonFreeArgs = 0;
+ unsigned NonLoopSources = 0, LoopSinks = 0;
SmallPtrSet<BasicBlock *, 4> Blocks;
for (auto *CV : CurrentVisited) {
if (auto *I = dyn_cast<Instruction>(CV))
if (auto *Arg = dyn_cast<Argument>(CV))
if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
++NonFreeArgs;
+ if (!isa<Instruction>(CV) ||
+ !LI.getLoopFor(cast<Instruction>(CV)->getParent()))
+ ++NonLoopSources;
continue;
}
+ if (isa<PHINode>(CV))
+ continue;
+ if (LI.getLoopFor(cast<Instruction>(CV)->getParent()))
+ ++LoopSinks;
if (Sinks.count(cast<Instruction>(CV)))
continue;
++ToPromote;
// DAG optimizations should be able to handle these cases better, especially
// for function arguments.
- if (!isa<PHINode>(V) && (ToPromote < 2 || (Blocks.size() == 1 &&
- (NonFreeArgs > SafeWrap.size()))))
+ if (!isa<PHINode>(V) && !(LoopSinks && NonLoopSources) &&
+ (ToPromote < 2 || (Blocks.size() == 1 && NonFreeArgs > SafeWrap.size())))
return false;
IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
<< "register for ZExt type\n");
continue;
}
- MadeChange |= TryToPromote(Phi, PromoteWidth);
+ MadeChange |= TryToPromote(Phi, PromoteWidth, LI);
} else if (auto *ICmp = dyn_cast<ICmpInst>(&I)) {
// Search up from icmps to try to promote their operands.
// Skip signed or pointer compares
for (auto &Op : ICmp->operands()) {
if (auto *OpI = dyn_cast<Instruction>(Op)) {
if (auto PromotedWidth = GetPromoteWidth(OpI)) {
- MadeChange |= TryToPromote(OpI, PromotedWidth);
+ MadeChange |= TryToPromote(OpI, PromotedWidth, LI);
break;
}
}
; CHECK-O2-NEXT: .LBB0_3:
; CHECK-O2-NEXT: mov w9, #1
; CHECK-O2-NEXT: .LBB0_4: // %lor.end.sink.split
-; CHECK-O2-NEXT: and w8, w8, #0xffff
; CHECK-O2-NEXT: cmp w8, w9
; CHECK-O2-NEXT: cset w0, eq
; CHECK-O2-NEXT: ret
; CHECK-O3-NEXT: cbz x1, .LBB0_4
; CHECK-O3-NEXT: // %bb.2:
; CHECK-O3-NEXT: mov w9, #2
-; CHECK-O3-NEXT: and w8, w8, #0xffff
; CHECK-O3-NEXT: cmp w8, w9
; CHECK-O3-NEXT: cset w0, eq
; CHECK-O3-NEXT: ret
; CHECK-O3-NEXT: .LBB0_3:
; CHECK-O3-NEXT: mov w9, #1
-; CHECK-O3-NEXT: and w8, w8, #0xffff
; CHECK-O3-NEXT: cmp w8, w9
; CHECK-O3-NEXT: cset w0, eq
; CHECK-O3-NEXT: ret
define i8 @loopcmp(ptr nocapture noundef readonly %x, i8 noundef %y) {
; CHECK-O2-LABEL: loopcmp:
; CHECK-O2: // %bb.0: // %entry
+; CHECK-O2-NEXT: and w9, w1, #0xff
; CHECK-O2-NEXT: .LBB1_1: // %while.cond
; CHECK-O2-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-O2-NEXT: ldrb w8, [x0], #1
-; CHECK-O2-NEXT: cmp w8, w1, uxtb
+; CHECK-O2-NEXT: cmp w8, w9
; CHECK-O2-NEXT: b.lo .LBB1_1
; CHECK-O2-NEXT: // %bb.2: // %while.end
; CHECK-O2-NEXT: mov w0, w8