#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
+#define DEBUG_TYPE "vector-combine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "vector-combine"
STATISTIC(NumVecLoad, "Number of vector loads formed");
STATISTIC(NumVecCmp, "Number of vector compares formed");
STATISTIC(NumVecBO, "Number of vector binops formed");
const DominatorTree &DT;
AAResults &AA;
AssumptionCache ∾
+ InstructionWorklist Worklist;
bool vectorizeLoadInsert(Instruction &I);
ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
bool foldExtractedCmps(Instruction &I);
bool foldSingleElementStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
+
+ void replaceValue(Value &Old, Value &New) {
+ Old.replaceAllUsesWith(&New);
+ New.takeName(&Old);
+ if (auto *NewI = dyn_cast<Instruction>(&New)) {
+ Worklist.pushUsersToWorkList(*NewI);
+ Worklist.pushValue(NewI);
+ }
+ Worklist.pushValue(&Old);
+ }
+
+ void eraseInstruction(Instruction &I) {
+ for (Value *Op : I.operands())
+ Worklist.pushValue(Op);
+ Worklist.remove(&I);
+ I.eraseFromParent();
+ }
};
} // namespace
-static void replaceValue(Value &Old, Value &New) {
- Old.replaceAllUsesWith(&New);
- New.takeName(&Old);
-}
-
bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
else
foldExtExtBinop(Ext0, Ext1, I);
+ Worklist.push(Ext0);
+ Worklist.push(Ext1);
return true;
}
DL);
NSI->setAlignment(ScalarOpAlignment);
replaceValue(I, *NSI);
- // Need erasing the store manually.
- I.eraseFromParent();
+ eraseInstruction(I);
return true;
}
/// Try to scalarize vector loads feeding extractelement instructions.
bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
Value *Ptr;
- Value *Idx;
- if (!match(&I, m_ExtractElt(m_Load(m_Value(Ptr)), m_Value(Idx))))
+ if (!match(&I, m_Load(m_Value(Ptr))))
return false;
- auto *LI = cast<LoadInst>(I.getOperand(0));
+ auto *LI = cast<LoadInst>(&I);
const DataLayout &DL = I.getModule()->getDataLayout();
if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
return false;
return false;
bool MadeChange = false;
+ auto FoldInst = [this, &MadeChange](Instruction &I) {
+ Builder.SetInsertPoint(&I);
+ MadeChange |= vectorizeLoadInsert(I);
+ MadeChange |= foldExtractExtract(I);
+ MadeChange |= foldBitcastShuf(I);
+ MadeChange |= scalarizeBinopOrCmp(I);
+ MadeChange |= foldExtractedCmps(I);
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= foldSingleElementStore(I);
+ };
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
if (!DT.isReachableFromEntry(&BB))
for (Instruction &I : make_early_inc_range(BB)) {
if (isa<DbgInfoIntrinsic>(I))
continue;
- Builder.SetInsertPoint(&I);
- MadeChange |= vectorizeLoadInsert(I);
- MadeChange |= foldExtractExtract(I);
- MadeChange |= foldBitcastShuf(I);
- MadeChange |= scalarizeBinopOrCmp(I);
- MadeChange |= foldExtractedCmps(I);
- MadeChange |= scalarizeLoadExtract(I);
- MadeChange |= foldSingleElementStore(I);
+ FoldInst(I);
}
}
- // We're done with transforms, so remove dead instructions.
- if (MadeChange)
- for (BasicBlock &BB : F)
- SimplifyInstructionsInBlock(&BB);
+ while (!Worklist.isEmpty()) {
+ Instruction *I = Worklist.removeOne();
+ if (!I)
+ continue;
+
+ if (isInstructionTriviallyDead(I)) {
+ eraseInstruction(*I);
+ continue;
+ }
+
+ FoldInst(*I);
+ }
return MadeChange;
}
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 225
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>*
-; CHECK-NEXT: [[TMP8:%.*]] = load <225 x double>, <225 x double>* [[TMP7]], align 8
-; CHECK-NEXT: [[MATRIXEXT4:%.*]] = extractelement <225 x double> [[TMP8]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP5]]
+; CHECK-NEXT: [[MATRIXEXT4:%.*]] = load double, double* [[TMP8]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul double [[MATRIXEXT]], [[MATRIXEXT4]]
-; CHECK-NEXT: [[MATRIXEXT7:%.*]] = extractelement <225 x double> [[TMP8]], i64 [[TMP1]]
-; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
-; CHECK-NEXT: store double [[SUB]], double* [[TMP9]], align 8
+; CHECK-NEXT: [[MATRIXEXT7:%.*]] = load double, double* [[TMP9]], align 8
+; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT: store double [[SUB]], double* [[TMP10]], align 8
; CHECK-NEXT: ret void
;
entry:
define void @load_extract_insert_store_const_idx(<225 x double>* %A) {
; CHECK-LABEL: @load_extract_insert_store_const_idx(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 0
+; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 1
+; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 1
-; CHECK-NEXT: store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 1
+; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_1]])
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i64 [[IDX_2:%.*]], 225
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_2]])
-; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_1]]
+; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[IDX_2]]
+; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
-; CHECK-NEXT: store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
+; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_2]])
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_1]]
+; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[IDX_2]]
+; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
-; CHECK-NEXT: store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
+; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8
; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond()
; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]]
; CHECK: exit:
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @PR34724(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
+; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @PR34724(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
+; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-LABEL: @insert_store_ptr_strip(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 3
+; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[ADDR0]], i64 0
+; CHECK-NEXT: [[ADDR2:%.*]] = bitcast <2 x i64>* [[ADDR1]] to <16 x i8>*
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[ADDR2]], i32 0, i32 3
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
; CHECK-NEXT: ret void
;