OpenGL issues should be fixed by now.
llvm-svn: 320568
Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
ArrayRef<unsigned> Idxs);
+/// \brief Attempt to constant fold an insertelement instruction with the
+/// specified operands and indices. The constant result is returned if
+/// successful; if not, null is returned.
+Constant *ConstantFoldInsertElementInstruction(Constant *Val,
+ Constant *Elt,
+ Constant *Idx);
+
/// \brief Attempt to constant fold an extractelement instruction with the
/// specified operands and indices. The constant result is returned if
/// successful; if not, null is returned.
Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q);
+/// Given operands for an InsertElement, fold the result or return null.
+Value *SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx,
+ const SimplifyQuery &Q);
+
/// Given operands for an ExtractValueInst, fold the result or return null.
Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q);
return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
}
+Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
+ const SimplifyQuery &Q) {
+ // Try to constant fold.
+ auto *VecC = dyn_cast<Constant>(Vec);
+ auto *ValC = dyn_cast<Constant>(Val);
+ auto *IdxC = dyn_cast<Constant>(Idx);
+ if (VecC && ValC && IdxC)
+ return ConstantFoldInsertElementInstruction(VecC, ValC, IdxC);
+
+ // Fold into undef if index is out of bounds.
+ if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ uint64_t NumElements = cast<VectorType>(Vec->getType())->getNumElements();
+
+ if (CI->uge(NumElements))
+ return UndefValue::get(Vec->getType());
+ }
+
+ // TODO: We should also fold if index is iteslf an undef.
+
+ return nullptr;
+}
+
/// Given operands for an ExtractValueInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
IV->getIndices(), Q);
break;
}
+ case Instruction::InsertElement: {
+ auto *IE = cast<InsertElementInst>(I);
+ Result = SimplifyInsertElementInst(IE->getOperand(0), IE->getOperand(1),
+ IE->getOperand(2), Q);
+ break;
+ }
case Instruction::ExtractValue: {
auto *EVI = cast<ExtractValueInst>(I);
Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
Value *ScalarOp = IE.getOperand(1);
Value *IdxOp = IE.getOperand(2);
+ if (auto *V = SimplifyInsertElementInst(
+ VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
+ return replaceInstUsesWith(IE, V);
+
// Inserting an undef or into an undefined place, remove this.
if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
replaceInstUsesWith(IE, VecOp);
}
declare void @llvm.assume(i1)
+
+define <4 x double> @inselt_bad_index(<4 x double> %a) {
+; CHECK-LABEL: @inselt_bad_index(
+; CHECK-NEXT: ret <4 x double> undef
+;
+ %I = insertelement <4 x double> %a, double 0.0, i64 4294967296
+ ret <4 x double> %I
+}
--- /dev/null
+; RUN: opt -S -instcombine < %s | FileCheck %s
+%S = type { i16, i32 }
+
+define <2 x i16> @test1() {
+entry:
+ %b = insertelement <2 x i16> <i16 undef, i16 0>, i16 extractvalue (%S select (i1 icmp eq (i16 extractelement (<2 x i16> bitcast (<1 x i32> <i32 1> to <2 x i16>), i32 0), i16 0), %S zeroinitializer, %S { i16 0, i32 1 }), 0), i32 0
+ ret <2 x i16> %b
+}
+
+; CHECK-LABEL: @test1(
+; CHECK: ret <2 x i16> zeroinitializer
ret <4 x float> %ins6
}
+; Out of bounds index folds to undef
define <4 x float> @bazzz(<4 x float> %x) {
; CHECK-LABEL: @bazzz(
-; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2
-; CHECK-NEXT: ret <4 x float> [[INS2]]
+; CHECK-NEXT: ret <4 x float> <float undef, float undef, float 2.000000e+00, float undef>
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 5
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
--- /dev/null
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %A) {
+ %I = insertelement <4 x i32> %A, i32 5, i64 4294967296
+ ; CHECK: ret <4 x i32> undef
+ ret <4 x i32> %I
+}
+
+define <4 x i32> @test2(<4 x i32> %A) {
+ %I = insertelement <4 x i32> %A, i32 5, i64 4
+ ; CHECK: ret <4 x i32> undef
+ ret <4 x i32> %I
+}
+
+define <4 x i32> @test3(<4 x i32> %A) {
+ %I = insertelement <4 x i32> %A, i32 5, i64 1
+ ; CHECK: ret <4 x i32> %I
+ ret <4 x i32> %I
+}
+
+define <4 x i32> @test4(<4 x i32> %A) {
+ %I = insertelement <4 x i32> %A, i32 5, i128 100
+ ; CHECK: ret <4 x i32> undef
+ ret <4 x i32> %I
+}
; RUN: opt -S -instsimplify < %s | FileCheck %s
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
%S = type { i16, i32 }
define <2 x i16> @test1() {
ret <2 x i16> %b
}
+; InstCombine will be able to fold this into zeroinitializer
; CHECK-LABEL: @test1(
-; CHECK: ret <2 x i16> zeroinitializer
+; CHECK: ret <2 x i16> <i16 extractvalue (%S select (i1 icmp eq (i16 extractelement (<2 x i16> bitcast (<1 x i32> <i32 1> to <2 x i16>), i32 0), i16 0), %S zeroinitializer, %S { i16 0, i32 1 }), 0), i16 0>