From 011a503f25cdbfff68bc28c8ac11f7df346674ef Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 28 Nov 2018 08:08:05 +0000 Subject: [PATCH] [SystemZ::TTI] Improved cost values for comparison against memory. Single instructions exist for i8 and i16 comparisons of memory against a small immediate. This patch makes sure that if the load in these cases has a single user (the ICmp), it gets a 0 cost (folded), and also that the ICmp gets a cost of 1. Review: Ulrich Weigand https://reviews.llvm.org/D54897 llvm-svn: 347733 --- .../Target/SystemZ/SystemZTargetTransformInfo.cpp | 17 +++++++++++++- llvm/test/Analysis/CostModel/SystemZ/cmp-mem.ll | 27 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Analysis/CostModel/SystemZ/cmp-mem.ll diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 19e4448..fdb998e 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -835,8 +835,17 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, switch (Opcode) { case Instruction::ICmp: { unsigned Cost = 1; - if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) + if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) { + if (I != nullptr) { + // Single instruction for comparison of memory with a small immediate. + if (const LoadInst* Ld = dyn_cast(I->getOperand(0))) { + const Instruction *FoldedValue = nullptr; + if (isFoldableLoad(Ld, FoldedValue)) + return Cost; + } + } Cost += 2; // extend both operands + } return Cost; } case Instruction::Select: @@ -932,6 +941,12 @@ isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) { if (SExtBits || ZExtBits) return false; + // Comparison between memory and immediate. + if (UserI->getOpcode() == Instruction::ICmp) + if (ConstantInt *CI = dyn_cast(UserI->getOperand(1))) + if (isUInt<16>(CI->getZExtValue())) + return true; + unsigned LoadOrTruncBits = (TruncBits ? TruncBits : LoadedBits); return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64); break; diff --git a/llvm/test/Analysis/CostModel/SystemZ/cmp-mem.ll b/llvm/test/Analysis/CostModel/SystemZ/cmp-mem.ll new file mode 100644 index 0000000..4f92d5b --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/cmp-mem.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Test costs for i8 and i16 comparisons against memory with a small immediate. + +define i32 @fun0(i8* %Src, i8* %Dst, i8 %Val) { +; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun0': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld = load i8, i8* %Src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Cmp = icmp eq i8 %Ld, 123 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %Ret = zext i1 %Cmp to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %Ret + %Ld = load i8, i8* %Src + %Cmp = icmp eq i8 %Ld, 123 + %Ret = zext i1 %Cmp to i32 + ret i32 %Ret +} + +define i32 @fun1(i16* %Src, i16* %Dst, i16 %Val) { +; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun1': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld = load i16, i16* %Src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Cmp = icmp eq i16 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %Ret = zext i1 %Cmp to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %Ret + %Ld = load i16, i16* %Src + %Cmp = icmp eq i16 %Ld, 1234 + %Ret = zext i1 %Cmp to i32 + ret i32 %Ret +} -- 2.7.4