From 6d2df181638a34f5d4ebc0c92cfb6a30abf8588d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 12 May 2021 13:18:20 +0800 Subject: [PATCH] [VectorComine] Restrict single-element-store index to inbounds constant Vector single element update optimization is landed in 2db4979. But the scope needs restriction. This patch restricts the index to inbounds and vector must be fixed sized. In future, we may use value tracking to relax constant restrictions. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D102146 --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 13 ++++--- .../Transforms/VectorCombine/load-insert-store.ll | 42 +++++++++++++++++++--- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index cee7880..c254f61 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -781,24 +781,29 @@ static bool isMemModifiedBetween(BasicBlock::iterator Begin, // store i32 %b, i32* %1 bool VectorCombine::foldSingleElementStore(Instruction &I) { StoreInst *SI = dyn_cast(&I); - if (!SI || !SI->isSimple() || !SI->getValueOperand()->getType()->isVectorTy()) + if (!SI || !SI->isSimple() || + !isa(SI->getValueOperand()->getType())) return false; // TODO: Combine more complicated patterns (multiple insert) by referencing // TargetTransformInfo. Instruction *Source; - Value *NewElement, *Idx; + Value *NewElement; + ConstantInt *Idx; if (!match(SI->getValueOperand(), m_InsertElt(m_Instruction(Source), m_Value(NewElement), - m_Value(Idx)))) + m_ConstantInt(Idx)))) return false; if (auto *Load = dyn_cast(Source)) { + auto VecTy = cast(SI->getValueOperand()->getType()); const DataLayout &DL = I.getModule()->getDataLayout(); Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts(); - // Don't optimize for atomic/volatile load or stores. + // Don't optimize for atomic/volatile load or store. Ensure memory is not + // modified between, vector type matches store size, and index is inbounds. if (!Load->isSimple() || Load->getParent() != SI->getParent() || !DL.typeSizeEqualsStoreSize(Load->getType()) || + Idx->uge(VecTy->getNumElements()) || SrcAddr != SI->getPointerOperand()->stripPointerCasts() || isMemModifiedBetween(Load->getIterator(), SI->getIterator(), MemoryLocation::get(SI), AA)) diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll index 3a6a7aa..71feaa7 100644 --- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll +++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll @@ -30,6 +30,37 @@ entry: ret void } +; To verify case when index is out of bounds +define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) { +; CHECK-LABEL: @insert_store_outofbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9 +; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <8 x i16>, <8 x i16>* %q + %vecins = insertelement <8 x i16> %0, i16 %s, i32 9 + store <8 x i16> %vecins, <8 x i16>* %q + ret void +} + +define void @insert_store_vscale(* %q, i16 zeroext %s) { +; CHECK-LABEL: @insert_store_vscale( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load , * [[Q:%.*]], align 16 +; CHECK-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i16 [[S:%.*]], i32 3 +; CHECK-NEXT: store [[VECINS]], * [[Q]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load , * %q + %vecins = insertelement %0, i16 %s, i32 3 + store %vecins, * %q + ret void +} + define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) { ; CHECK-LABEL: @insert_store_v9i4( ; CHECK-NEXT: entry: @@ -82,8 +113,9 @@ cont: define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX:%.*]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]] +; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: @@ -93,17 +125,17 @@ entry: ret void } -define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) { ; CHECK-LABEL: @insert_store_ptr_strip( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 [[IDX:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 3 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: %0 = load <16 x i8>, <16 x i8>* %q - %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx + %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 %addr0 = bitcast <16 x i8>* %q to <2 x i64>* %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0 %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>* -- 2.7.4