return true;
}
+static bool expandURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
+ assert(Instr->getOpcode() == Instruction::URem);
+ assert(!Instr->getType()->isVectorTy());
+
+ Value *X = Instr->getOperand(0);
+ Value *Y = Instr->getOperand(1);
+
+ ConstantRange XCR = LVI->getConstantRange(X, Instr);
+ ConstantRange YCR = LVI->getConstantRange(Y, Instr);
+
+ // Given
+ // R = X u% Y
+ // We can represent the modulo operation as a loop/self-recursion:
+ // urem_rec(X, Y):
+ // Z = X - Y
+ // if X u< Y
+ // ret X
+ // else
+ // ret urem_rec(Z, Y)
+ // which isn't better, but if we only need a single iteration
+ // to compute the answer, this becomes quite good:
+ // R = X < Y ? X : X - Y iff X u< 2*Y (w/ unsigned saturation)
+ // Now, we do not care about all full multiples of Y in X, they do not change
+ // the answer, thus we could rewrite the expression as:
+ // X* = X - (Y * |_ X / Y _|)
+ // R = X* % Y
+ // so we don't need the *first* iteration to return, we just need to
+ // know *which* iteration will always return, so we could also rewrite it as:
+ // X* = X - (Y * |_ X / Y _|)
+ // R = X* % Y iff X* u< 2*Y (w/ unsigned saturation)
+ // but that does not seem profitable here.
+
+ bool XIsBound =
+ XCR.icmp(ICmpInst::ICMP_ULT, YCR.umul_sat(APInt(YCR.getBitWidth(), 2)));
+ // Even if we don't know X's range, the divisor may be so large, X can't ever
+ // be 2x larger than that. I.e. if divisor is always negative.
+ if (!XIsBound && !YCR.isAllNegative())
+ return false;
+
+ IRBuilder<> B{Instr};
+ if (!XIsBound) {
+ // NOTE: this transformation increases use count on X, but that is fine
+ // unless the transformation is valid because the divisor is negative,
+ // and is non-variable, and thus we didn't have any extra uses.
+ if (auto *Ycst = dyn_cast<ConstantInt>(Y); Ycst && Ycst->isNegative())
+ X = B.CreateFreeze(X, X->getName() + ".frozen");
+ }
+ auto *AdjX = B.CreateNUWSub(X, Y, Instr->getName() + ".urem");
+ auto *Cmp = B.CreateICmp(ICmpInst::ICMP_ULT, X, Y, Instr->getName() + ".cmp");
+ auto *ExpandedURem = B.CreateSelect(Cmp, X, AdjX);
+ ExpandedURem->takeName(Instr);
+ Instr->replaceAllUsesWith(ExpandedURem);
+ Instr->eraseFromParent();
+ ++NumURemExpanded;
+ return true;
+}
+
static bool processURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
assert(Instr->getOpcode() == Instruction::URem);
assert(!Instr->getType()->isVectorTy());
return true;
}
+ if (expandURem(Instr, LVI))
+ return true;
+
return false;
}
; CHECK-LABEL: @constant.divisor.v4(
; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 4
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], 3
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 3
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x.upper = icmp ult i8 %x, 4
define i8 @constant.divisor.x.range.v4(ptr %x.ptr) {
; CHECK-LABEL: @constant.divisor.x.range.v4(
; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[X_PTR:%.*]], align 1, !range [[RNG0:![0-9]+]]
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], 3
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 3
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%x = load i8, ptr %x.ptr, !range !{ i8 0, i8 4 }
; CHECK-LABEL: @constant.divisor.v5(
; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 5
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], 3
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 3
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x.upper = icmp ult i8 %x, 5
; CHECK-LABEL: @constant.divisor.v6(
; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 6
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], 3
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 3
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x.upper = icmp ult i8 %x, 6
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]])
; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x = icmp ult i8 %x, 4
; CHECK-LABEL: @variable.v4.range(
; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[X_PTR:%.*]], align 1, !range [[RNG0]]
; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[Y_PTR:%.*]], align 1, !range [[RNG1:![0-9]+]]
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%x = load i8, ptr %x.ptr, !range !{ i8 0, i8 4 }
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]])
; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x = icmp ult i8 %x, 5
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]])
; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], [[Y]]
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x = icmp ult i8 %x, 6
; CHECK-LABEL: @large.divisor.v1(
; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], -128
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 127
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], 127
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 127
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x.upper = icmp ult i8 %x, 128
define i8 @large.divisor.v1.range(ptr %x.ptr) {
; CHECK-LABEL: @large.divisor.v1.range(
; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[X_PTR:%.*]], align 1, !range [[RNG2:![0-9]+]]
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 127
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], 127
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 127
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%x = load i8, ptr %x.ptr, !range !{ i8 0, i8 128 }
; CHECK-LABEL: @large.divisor.with.overflow.v1(
; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], -127
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]])
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], -128
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], -128
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], -128
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%cmp.x.upper = icmp ult i8 %x, 129
define i8 @large.divisor.with.overflow.v1.range(ptr %x.ptr) {
; CHECK-LABEL: @large.divisor.with.overflow.v1.range(
; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[X_PTR:%.*]], align 1, !range [[RNG3:![0-9]+]]
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], -128
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X]], -128
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], -128
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%x = load i8, ptr %x.ptr, !range !{ i8 0, i8 129 }
}
define i8 @large.divisor.with.overflow.v2.unbound.x(i8 %x) {
; CHECK-LABEL: @large.divisor.with.overflow.v2.unbound.x(
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X:%.*]], -128
+; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i8 [[X:%.*]]
+; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw i8 [[X_FROZEN]], -128
+; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X_FROZEN]], -128
+; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X_FROZEN]], i8 [[REM_UREM]]
; CHECK-NEXT: ret i8 [[REM]]
;
%rem = urem i8 %x, 128