TTI, Processed);
}
+ if (S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr) {
+ const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S);
+
+ unsigned Opcode;
+ switch (S->getSCEVType()) {
+ case scAddExpr:
+ Opcode = Instruction::Add;
+ break;
+ case scMulExpr:
+ Opcode = Instruction::Mul;
+ break;
+ default:
+ llvm_unreachable("There are no other variants here.");
+ }
+
+ Type *OpType = NAry->getType();
+ int PairCost = TTI.getOperationCost(Opcode, OpType);
+ // TODO: this is a very pessimistic cost modelling for Mul,
+ // because of Bin Pow algorithm actually used by the expander,
+ // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
+
+ assert(NAry->getNumOperands() > 1 &&
+ "Nary expr should have more than 1 operand.");
+ for (const SCEV *Op : NAry->operands()) {
+ if (isHighCostExpansionHelper(Op, L, At, BudgetRemaining, TTI, Processed))
+ return true;
+ if (Op == *NAry->op_begin())
+ continue;
+ BudgetRemaining -= PairCost;
+ }
+
+ return BudgetRemaining < 0;
+ }
+
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
// the exit condition.
if (isa<SCEVMinMaxExpr>(S))
; CHECK-NEXT: store i8 0, i8* [[ADR2]]
; CHECK-NEXT: [[ADR3:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 0, i8* [[ADR3]]
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
+; CHECK-NEXT: [[INNERCMP:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: br i1 [[INNERCMP]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
; CHECK: innerexit:
; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN:%.*]], 11
; CHECK-NEXT: br i1 [[CMP8]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.body.lr.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -12
-; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], 12
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 12
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[KEYLEN_010:%.*]] = phi i32 [ [[LEN]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 11
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]]
; CHECK: while.cond.while.end_crit_edge:
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[WHILE_BODY]] ]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
-; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
; CHECK-NEXT: call void @_Z3mixRjj(i32* dereferenceable(4) [[A]], i32 [[KEYLEN_0_LCSSA]])
; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[T]])