AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr);
+ /// Given operands for an BitCastInst, fold the result or return null.
+ Value *SimplifyBitCastInst(Value *Op, Type *Ty, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr,
+ const DominatorTree *DT = nullptr,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr);
+
//=== Helper functions for higher up the class hierarchy.
static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
+static Value *SimplifyBitCastInst(Value *, Type *, const Query &, unsigned);
/// For a boolean type, or a vector of boolean type, return false, or
/// a vector with every element false, as appropriate for the type.
RecursionLimit);
}
+static Value *SimplifyBitCastInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
+ if (auto *C = dyn_cast<Constant>(Op))
+ return ConstantFoldCastOperand(Instruction::BitCast, C, Ty, Q.DL);
+
+ // bitcast x -> x
+ if (Op->getType() == Ty)
+ return Op;
+
+ // bitcast(bitcast x) -> x
+ if (auto *BC = dyn_cast<BitCastInst>(Op))
+ if (BC->getOperand(0)->getType() == Ty)
+ return BC->getOperand(0);
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyBitCastInst(Value *Op, Type *Ty, const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyBitCastInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
//=== Helper functions for higher up the class hierarchy.
/// Given operands for a BinaryOperator, see if we can fold the result.
Result =
SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
break;
+ case Instruction::BitCast:
+ Result =
+ SimplifyBitCastInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
+ break;
}
// In general, it is possible for computeKnownBits to determine all bits in a
; Check that the first @llvm.aarch64.neon.st2 is optimized away by Early CSE.
; CHECK-LABEL: @test_cse2
; CHECK-NOT: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> %3, <4 x i32> %3, i8* %0)
-; CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> %3, <4 x i32> %4, i8* %0)
+; CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> %s.coerce.fca.0.extract, <4 x i32> %s.coerce.fca.1.extract, i8* %0)
%s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
%s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
br label %for.cond
--- /dev/null
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+define i1 @test1(i1 %V) {
+entry:
+ %Z = zext i1 %V to i32
+ %T = trunc i32 %Z to i1
+ ret i1 %T
+; CHECK-LABEL: define i1 @test1(
+; CHECK: ret i1 %V
+}
+
+define i8* @test2(i8* %V) {
+entry:
+ %BC1 = bitcast i8* %V to i32*
+ %BC2 = bitcast i32* %BC1 to i8*
+ ret i8* %BC2
+; CHECK-LABEL: define i8* @test2(
+; CHECK: ret i8* %V
+}
+
+define i8* @test3(i8* %V) {
+entry:
+ %BC = bitcast i8* %V to i8*
+ ret i8* %BC
+; CHECK-LABEL: define i8* @test3(
+; CHECK: ret i8* %V
+}