From 215df9ed98057e05e8032fe9d4bd8e1185562a7c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 4 Aug 2015 01:52:05 +0000 Subject: [PATCH] Revert "[LSR] Generate and use zero extends" This reverts commit r243348 and r243357. They caused PR24347. llvm-svn: 243939 --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 160 +++------------------ .../LoopStrengthReduce/X86/zext-of-scale.ll | 70 --------- 2 files changed, 21 insertions(+), 209 deletions(-) delete mode 100644 llvm/test/Transforms/LoopStrengthReduce/X86/zext-of-scale.ll diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 059b10e..773777a 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -256,22 +256,9 @@ struct Formula { /// live in an add immediate field rather than a register. int64_t UnfoldedOffset; - /// ZeroExtendScaledReg - This formula zero extends the scale register to - /// ZeroExtendType before its use. - bool ZeroExtendScaledReg; - - /// ZeroExtendBaseReg - This formula zero extends all the base registers to - /// ZeroExtendType before their use. - bool ZeroExtendBaseReg; - - /// ZeroExtendType - The destination type of the zero extension implied by - /// the above two booleans. - Type *ZeroExtendType; - Formula() : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), - ScaledReg(nullptr), UnfoldedOffset(0), ZeroExtendScaledReg(false), - ZeroExtendBaseReg(false), ZeroExtendType(nullptr) {} + ScaledReg(nullptr), UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -426,12 +413,10 @@ size_t Formula::getNumRegs() const { /// getType - Return the type of this formula, if it has one, or null /// otherwise. This type is meaningless except for the bit size. Type *Formula::getType() const { - return ZeroExtendType - ? ZeroExtendType - : !BaseRegs.empty() - ? BaseRegs.front()->getType() - : ScaledReg ? ScaledReg->getType() - : BaseGV ? BaseGV->getType() : nullptr; + return !BaseRegs.empty() ? BaseRegs.front()->getType() : + ScaledReg ? ScaledReg->getType() : + BaseGV ? BaseGV->getType() : + nullptr; } /// DeleteBaseReg - Delete the given base reg from the BaseRegs list. @@ -472,10 +457,7 @@ void Formula::print(raw_ostream &OS) const { } for (const SCEV *BaseReg : BaseRegs) { if (!First) OS << " + "; else First = false; - if (ZeroExtendBaseReg) - OS << "reg(zext " << *BaseReg << " to " << *ZeroExtendType << ')'; - else - OS << "reg(" << *BaseReg << ')'; + OS << "reg(" << *BaseReg << ')'; } if (HasBaseReg && BaseRegs.empty()) { if (!First) OS << " + "; else First = false; @@ -487,12 +469,9 @@ void Formula::print(raw_ostream &OS) const { if (Scale != 0) { if (!First) OS << " + "; else First = false; OS << Scale << "*reg("; - if (ScaledReg) { - if (ZeroExtendScaledReg) - OS << "(zext " << *ScaledReg << " to " << *ZeroExtendType << ')'; - else - OS << *ScaledReg; - } else + if (ScaledReg) + OS << *ScaledReg; + else OS << ""; OS << ')'; } @@ -1753,7 +1732,6 @@ class LSRInstance { void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); - void GenerateZExts(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateCrossUseConstantOffsets(); void GenerateAllReuseFormulae(); @@ -3649,64 +3627,6 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { } } -/// GenerateZExts - If a scale or a base register can be rewritten as -/// "Zext({A,+,1})" then consider a formula of that form. -void LSRInstance::GenerateZExts(LSRUse &LU, unsigned LUIdx, Formula Base) { - // Don't bother with symbolic values. - if (Base.BaseGV) - return; - - auto CanBeNarrowed = [&](const SCEV *Reg) -> const SCEV * { - // Check if the register is an increment can be rewritten as zext(R) where - // the zext is free. - - const auto *RegAR = dyn_cast_or_null(Reg); - if (!RegAR) - return nullptr; - - const auto *ZExtStart = dyn_cast(RegAR->getStart()); - const auto *ConstStep = - dyn_cast(RegAR->getStepRecurrence(SE)); - if (!ZExtStart || !ConstStep || ConstStep->getValue()->getValue() != 1) - return nullptr; - - const SCEV *NarrowStart = ZExtStart->getOperand(); - if (!TTI.isZExtFree(NarrowStart->getType(), ZExtStart->getType())) - return nullptr; - - const auto *NarrowAR = dyn_cast( - SE.getAddRecExpr(NarrowStart, SE.getConstant(NarrowStart->getType(), 1), - RegAR->getLoop(), RegAR->getNoWrapFlags())); - - if (!NarrowAR || !NarrowAR->getNoWrapFlags(SCEV::FlagNUW)) - return nullptr; - - return NarrowAR; - }; - - if (Base.ScaledReg && !Base.ZeroExtendType) - if (const SCEV *S = CanBeNarrowed(Base.ScaledReg)) { - Formula F = Base; - F.ZeroExtendType = Base.ScaledReg->getType(); - F.ZeroExtendScaledReg = true; - F.ScaledReg = S; - - if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - InsertFormula(LU, LUIdx, F); - } - - if (Base.BaseRegs.size() == 1 && !Base.ZeroExtendType) - if (const SCEV *S = CanBeNarrowed(Base.BaseRegs[0])) { - Formula F = Base; - F.ZeroExtendType = Base.BaseRegs[0]->getType(); - F.ZeroExtendBaseReg = true; - F.BaseRegs[0] = S; - - if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - InsertFormula(LU, LUIdx, F); - } -} - namespace { /// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to @@ -3926,8 +3846,6 @@ LSRInstance::GenerateAllReuseFormulae() { LSRUse &LU = Uses[LUIdx]; for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) GenerateTruncates(LU, LUIdx, LU.Formulae[i]); - for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) - GenerateZExts(LU, LUIdx, LU.Formulae[i]); } GenerateCrossUseConstantOffsets(); @@ -4565,27 +4483,12 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // If we're expanding for a post-inc user, make the post-inc adjustment. PostIncLoopSet &Loops = const_cast(LF.PostIncLoops); - const SCEV *ExtendedReg = - F.ZeroExtendBaseReg ? SE.getZeroExtendExpr(Reg, F.ZeroExtendType) : Reg; - - const SCEV *PostIncReg = - TransformForPostIncUse(Denormalize, ExtendedReg, LF.UserInst, - LF.OperandValToReplace, Loops, SE, DT); - if (PostIncReg == ExtendedReg) { - Value *Expanded = Rewriter.expandCodeFor(Reg, nullptr, IP); - if (F.ZeroExtendBaseReg) - Expanded = new ZExtInst(Expanded, F.ZeroExtendType, "", IP); - Ops.push_back(SE.getUnknown(Expanded)); - } else { - Ops.push_back( - SE.getUnknown(Rewriter.expandCodeFor(PostIncReg, nullptr, IP))); - } - } + Reg = TransformForPostIncUse(Denormalize, Reg, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); - // Note on post-inc uses and zero extends -- since the no-wrap behavior for - // the post-inc SCEV can be different from the no-wrap behavior of the pre-inc - // SCEV, if a post-inc transform is required we do the zero extension on the - // pre-inc expression before doing the post-inc transform. + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP))); + } // Expand the ScaledReg portion. Value *ICmpScaledV = nullptr; @@ -4594,33 +4497,22 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // If we're expanding for a post-inc user, make the post-inc adjustment. PostIncLoopSet &Loops = const_cast(LF.PostIncLoops); - const SCEV *ExtendedScaleS = - F.ZeroExtendScaledReg ? SE.getZeroExtendExpr(ScaledS, F.ZeroExtendType) - : ScaledS; - const SCEV *PostIncScaleS = - TransformForPostIncUse(Denormalize, ExtendedScaleS, LF.UserInst, - LF.OperandValToReplace, Loops, SE, DT); + ScaledS = TransformForPostIncUse(Denormalize, ScaledS, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { // Expand ScaleReg as if it was part of the base regs. - Value *Expanded = nullptr; - if (PostIncScaleS == ExtendedScaleS) { - Expanded = Rewriter.expandCodeFor(ScaledS, nullptr, IP); - if (F.ZeroExtendScaledReg) - Expanded = new ZExtInst(Expanded, F.ZeroExtendType, "", IP); - } else { - Expanded = Rewriter.expandCodeFor(PostIncScaleS, nullptr, IP); - } - if (F.Scale == 1) - Ops.push_back(SE.getUnknown(Expanded)); + Ops.push_back( + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP))); else { // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Expanded; + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); } } else { // Otherwise just expand the scaled register and an explicit scale, @@ -4634,17 +4526,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - - Value *Expanded = nullptr; - if (PostIncScaleS == ExtendedScaleS) { - Expanded = Rewriter.expandCodeFor(ScaledS, nullptr, IP); - if (F.ZeroExtendScaledReg) - Expanded = new ZExtInst(Expanded, F.ZeroExtendType, "", IP); - } else { - Expanded = Rewriter.expandCodeFor(PostIncScaleS, nullptr, IP); - } - - ScaledS = SE.getUnknown(Expanded); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)); if (F.Scale != 1) ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/zext-of-scale.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/zext-of-scale.ll deleted file mode 100644 index d0972fe..0000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/zext-of-scale.ll +++ /dev/null @@ -1,70 +0,0 @@ -; RUN: opt < %s -S -loop-reduce | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct = type { [8 x i8] } - -declare void @use_32(i32) -declare void @use_64(i64) - -define void @f(i32 %tmp156, i32* %length_buf_1, i32* %length_buf_0, %struct* %b, - %struct* %c, %struct* %d, %struct* %e, i32* %length_buf_2, - i32 %tmp160) { -; CHECK-LABEL: @f( -entry: - %begin151 = getelementptr inbounds %struct, %struct* %b, i64 0, i32 0, i64 12 - %tmp21 = bitcast i8* %begin151 to i32* - %begin157 = getelementptr inbounds %struct, %struct* %c, i64 0, i32 0, i64 16 - %tmp23 = bitcast i8* %begin157 to double* - %begin163 = getelementptr inbounds %struct, %struct* %d, i64 0, i32 0, i64 16 - %tmp25 = bitcast i8* %begin163 to double* - %length.i820 = load i32, i32* %length_buf_1, align 4, !range !0 - %enter = icmp ne i32 %tmp156, -1 - br i1 %enter, label %ok_146, label %block_81_2 - -ok_146: - %var_13 = phi double [ %tmp186, %ok_161 ], [ 0.000000e+00, %entry ] - %var_17 = phi i32 [ %tmp187, %ok_161 ], [ %tmp156, %entry ] - %tmp174 = zext i32 %var_17 to i64 - %tmp175 = icmp ult i32 %var_17, %length.i820 - br i1 %tmp175, label %ok_152, label %block_81_2 - -ok_152: - %tmp176 = getelementptr inbounds i32, i32* %tmp21, i64 %tmp174 - %tmp177 = load i32, i32* %tmp176, align 4 - %tmp178 = zext i32 %tmp177 to i64 - %length.i836 = load i32, i32* %length_buf_2, align 4, !range !0 - %tmp179 = icmp ult i32 %tmp177, %length.i836 - br i1 %tmp179, label %ok_158, label %block_81_2 - -ok_158: - %tmp180 = getelementptr inbounds double, double* %tmp23, i64 %tmp178 - %tmp181 = load double, double* %tmp180, align 8 - %length.i = load i32, i32* %length_buf_0, align 4, !range !0 - %tmp182 = icmp slt i32 %var_17, %length.i - br i1 %tmp182, label %ok_161, label %block_81_2 - -ok_161: -; CHECK-LABEL: ok_161: -; CHECK: add -; CHECK-NOT: add - %tmp183 = getelementptr inbounds double, double* %tmp25, i64 %tmp174 - %tmp184 = load double, double* %tmp183, align 8 - %tmp185 = fmul double %tmp181, %tmp184 - %tmp186 = fadd double %var_13, %tmp185 - %tmp187 = add nsw i32 %var_17, 1 - %tmp188 = icmp slt i32 %tmp187, %tmp160 -; CHECK: br - br i1 %tmp188, label %ok_146, label %block_81 - -block_81: - call void @use_64(i64 %tmp174) ;; pre-inc use - call void @use_32(i32 %tmp187) ;; post-inc use - ret void - -block_81_2: - ret void -} - -!0 = !{i32 0, i32 2147483647} -- 2.7.4