From fda8b4714e05c68deee469970cb6f7f7ea8b32b7 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 17 Jun 2021 09:53:33 +0100 Subject: [PATCH] [InterleaveAccess] Copy fast math flags when adjusting binary operators in interleave access pass The Interleave Access pass will convert shuffle(binop(load, load)) to binop(shuffle(load), shuffle(load)), in order to create more interleaving load patterns (VLD2/3/4) that might have been messed up by instcombine. As shown in D104247 we were missing copying IR flags to the new instruction though, which should just be kept the same as the original instruction. Differential Revision: https://reviews.llvm.org/D104255 --- llvm/include/llvm/IR/InstrTypes.h | 10 ++++----- llvm/lib/CodeGen/InterleavedAccessPass.cpp | 4 ++-- .../AArch64/binopshuffles-inseltpoison.ll | 24 +++++++++++----------- .../InterleavedAccess/AArch64/binopshuffles.ll | 24 +++++++++++----------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 7fc4602..0070c87 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -248,11 +248,11 @@ public: } #include "llvm/IR/Instruction.def" - static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc, - Value *V1, Value *V2, - Instruction *CopyO, - const Twine &Name = "") { - BinaryOperator *BO = Create(Opc, V1, V2, Name); + static BinaryOperator * + CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, + const Twine &Name = "", + Instruction *InsertBefore = nullptr) { + BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore); BO->copyIRFlags(CopyO); return BO; } diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 1007874..24a57cc 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -408,8 +408,8 @@ bool InterleavedAccess::replaceBinOpShuffles( auto *NewSVI2 = new ShuffleVectorInst( BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask, SVI->getName(), SVI); - Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2, - BI->getName(), SVI); + BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags( + BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI); SVI->replaceAllUsesWith(NewBI); LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI << "\n With : " << *NewSVI1 << "\n And : " diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll index 47327b8..f29fffc 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll @@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) { ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]] ; CHECK-NEXT: ret <4 x float> [[L6]] ; @@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) { ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]] -; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]] ; CHECK-NEXT: ret <4 x float> [[L9]] ; @@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) { ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]] -; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]] ; CHECK-NEXT: ret <4 x float> [[L12]] ; @@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1 -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; @@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1 -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]] ; CHECK-NEXT: ret <4 x float> [[L8]] diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll index 4711409..5befe69 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll @@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) { ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]] ; CHECK-NEXT: ret <4 x float> [[L6]] ; @@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) { ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]] -; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]] ; CHECK-NEXT: ret <4 x float> [[L9]] ; @@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) { ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]] -; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]] ; CHECK-NEXT: ret <4 x float> [[L12]] ; @@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1 -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; @@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1 -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]] ; CHECK-NEXT: ret <4 x float> [[L8]] -- 2.7.4