break;
}
case Instruction::Shl: {
- // If we are truncating the result of this SHL, and if it's a shift of a
- // constant amount, we can always perform a SHL in a smaller type.
- const APInt *Amt;
- if (match(I->getOperand(1), m_APInt(Amt))) {
- uint32_t BitWidth = Ty->getScalarSizeInBits();
- if (Amt->getLimitedValue(BitWidth) < BitWidth)
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
- }
+ // If we are truncating the result of this SHL, and if it's a shift of an
+ // inrange amount, we can always perform a SHL in a smaller type.
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ KnownBits AmtKnownBits =
+ llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
+ if (AmtKnownBits.getMaxValue().ult(BitWidth))
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
break;
}
case Instruction::LShr: {
// If this is a truncate of a logical shr, we can truncate it to a smaller
// lshr iff we know that the bits we would otherwise be shifting in are
// already zeros.
- const APInt *Amt;
- if (match(I->getOperand(1), m_APInt(Amt))) {
- uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
- uint32_t BitWidth = Ty->getScalarSizeInBits();
- if (Amt->getLimitedValue(BitWidth) < BitWidth &&
- IC.MaskedValueIsZero(I->getOperand(0),
- APInt::getBitsSetFrom(OrigBitWidth, BitWidth), 0, CxtI)) {
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
- }
+ // TODO: It is enough to check that the bits we would be shifting in are
+ // zero - use AmtKnownBits.getMaxValue().
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ KnownBits AmtKnownBits =
+ llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
+ APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+ if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
+ IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
}
break;
}
// original type and the sign bit of the truncate type are similar.
// TODO: It is enough to check that the bits we would be shifting in are
// similar to sign bit of the truncate type.
- const APInt *Amt;
- if (match(I->getOperand(1), m_APInt(Amt))) {
- uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
- uint32_t BitWidth = Ty->getScalarSizeInBits();
- if (Amt->getLimitedValue(BitWidth) < BitWidth &&
- OrigBitWidth - BitWidth <
- IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
- }
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ KnownBits AmtKnownBits =
+ llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
+ unsigned ShiftedBits = OrigBitWidth - BitWidth;
+ if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
+ ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
break;
}
case Instruction::Trunc:
define <2 x i64> @test8_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: @test8_vec_nonuniform(
-; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
-; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128>
-; CHECK-NEXT: [[E:%.*]] = shl <2 x i128> [[D]], <i128 32, i128 48>
-; CHECK-NEXT: [[F:%.*]] = or <2 x i128> [[E]], [[C]]
-; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> [[G]]
+; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT: [[E:%.*]] = shl <2 x i64> [[D]], <i64 32, i64 48>
+; CHECK-NEXT: [[F:%.*]] = or <2 x i64> [[E]], [[C]]
+; CHECK-NEXT: ret <2 x i64> [[F]]
;
%C = zext <2 x i32> %A to <2 x i128>
%D = zext <2 x i32> %B to <2 x i128>
define i64 @test11(i32 %A, i32 %B) {
; CHECK-LABEL: @test11(
-; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i128
+; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31
-; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i128
-; CHECK-NEXT: [[F:%.*]] = shl i128 [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc i128 [[F]] to i64
-; CHECK-NEXT: ret i64 [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[F:%.*]] = shl i64 [[C]], [[E]]
+; CHECK-NEXT: ret i64 [[F]]
;
%C = zext i32 %A to i128
%D = zext i32 %B to i128
define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: @test11_vec(
-; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
-; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT: [[F:%.*]] = shl <2 x i128> [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]]
+; CHECK-NEXT: ret <2 x i64> [[F]]
;
%C = zext <2 x i32> %A to <2 x i128>
%D = zext <2 x i32> %B to <2 x i128>
define <2 x i64> @test11_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: @test11_vec_nonuniform(
-; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 15>
-; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT: [[F:%.*]] = shl <2 x i128> [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[F:%.*]] = shl <2 x i64> [[C]], [[E]]
+; CHECK-NEXT: ret <2 x i64> [[F]]
;
%C = zext <2 x i32> %A to <2 x i128>
%D = zext <2 x i32> %B to <2 x i128>
define i64 @test12(i32 %A, i32 %B) {
; CHECK-LABEL: @test12(
-; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i128
+; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31
-; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i128
-; CHECK-NEXT: [[F:%.*]] = lshr i128 [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc i128 [[F]] to i64
-; CHECK-NEXT: ret i64 [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[F:%.*]] = lshr i64 [[C]], [[E]]
+; CHECK-NEXT: ret i64 [[F]]
;
%C = zext i32 %A to i128
%D = zext i32 %B to i128
define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: @test12_vec(
-; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
-; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT: ret <2 x i64> [[F]]
;
%C = zext <2 x i32> %A to <2 x i128>
%D = zext <2 x i32> %B to <2 x i128>
define <2 x i64> @test12_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: @test12_vec_nonuniform(
-; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 15>
-; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT: ret <2 x i64> [[F]]
;
%C = zext <2 x i32> %A to <2 x i128>
%D = zext <2 x i32> %B to <2 x i128>
define i64 @test13(i32 %A, i32 %B) {
; CHECK-LABEL: @test13(
-; CHECK-NEXT: [[C:%.*]] = sext i32 [[A:%.*]] to i128
+; CHECK-NEXT: [[C:%.*]] = sext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 31
-; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i128
-; CHECK-NEXT: [[F:%.*]] = ashr i128 [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc i128 [[F]] to i64
-; CHECK-NEXT: ret i64 [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[F:%.*]] = ashr i64 [[C]], [[E]]
+; CHECK-NEXT: ret i64 [[F]]
;
%C = sext i32 %A to i128
%D = zext i32 %B to i128
define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: @test13_vec(
-; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
-; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT: ret <2 x i64> [[F]]
;
%C = sext <2 x i32> %A to <2 x i128>
%D = zext <2 x i32> %B to <2 x i128>
define <2 x i64> @test13_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: @test13_vec_nonuniform(
-; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 15>
-; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> [[G]]
+; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT: ret <2 x i64> [[F]]
;
%C = sext <2 x i32> %A to <2 x i128>
%D = zext <2 x i32> %B to <2 x i128>