This is the third attemp to recommit r292526.
The original summary:
Currently, a GEP is considered free only if its indices are all constant.
TTI::getGEPCost() can give target-specific more accurate analysis. TTI is
already used for the cost of many other instructions.
llvm-svn: 292633
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost);
bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+ bool isGEPFree(GetElementPtrInst &GEP);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
bool simplifyCallSite(Function *F, CallSite CS);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
return true;
}
+/// \brief Use TTI to check whether a GEP is free.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
+ SmallVector<Value *, 4> Indices;
+ for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+ if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
+ Indices.push_back(SimpleOp);
+ else
+ Indices.push_back(*I);
+ return TargetTransformInfo::TCC_Free ==
+ TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
+ Indices);
+}
+
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Check whether inlining will turn a dynamic alloca into a static
// alloca and handle that case.
// Non-constant GEPs aren't folded, and disable SROA.
if (SROACandidate)
disableSROA(CostIt);
- return false;
+ return isGEPFree(I);
}
// Add the result as a new mapping to Base + Offset.
// Variable GEPs will require math and will disable SROA.
if (SROACandidate)
disableSROA(CostIt);
- return false;
+ return isGEPFree(I);
}
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
--- /dev/null
+; REQUIRES: asserts
+; RUN: opt -inline -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define void @outer([4 x i32]* %ptr, i32 %i) {
+ call void @inner1([4 x i32]* %ptr, i32 %i)
+ call void @inner2([4 x i32]* %ptr, i32 %i)
+ ret void
+}
+; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64.
+; Thus, both the gep and ret can be simplified.
+; CHECK: Analyzing call of inner1
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 2
+define void @inner1([4 x i32]* %ptr, i32 %i) {
+ %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i
+ ret void
+}
+
+; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for
+; AArch64. Thus, only the ret can be simplified and not the gep.
+; CHECK: Analyzing call of inner2
+; CHECK: NumInstructionsSimplified: 1
+; CHECK: NumInstructions: 2
+define void @inner2([4 x i32]* %ptr, i32 %i) {
+ %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i
+ ret void
+}
--- /dev/null
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True