From 687e4af1c05ae36af88900d41150e260d8f273c0 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 7 Feb 2022 20:16:13 -0800 Subject: [PATCH] [BOLT] CMOVConversion pass MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Convert simple hammocks into cmov based on misprediction rate. Test Plan: - Assembly test: `cmov-conversion.s` - Testing on a binary: # Bootstrap clang with `-x86-cmov-converter-force-all` and `-Wl,--emit-relocs` (Release build) # Collect perf.data: - `clang++ bolt/lib/Core/BinaryFunction.cpp -E > bf.cpp` - `perf record -e cycles:u -j any,u -- clang-15 bf.cpp -O2 -std=c++14 -c -o bf.o` # Optimize clang-15 with and w/o -cmov-conversion: - `llvm-bolt clang-15 -p perf.data -o clang-15.bolt` - `llvm-bolt clang-15 -p perf.data -cmov-conversion -o clang-15.bolt.cmovconv` # Run perf experiment: - test: `clang-15.bolt.cmovconv`, - control: `clang-15.bolt`, - workload (clang options): `bf.cpp -O2 -std=c++14 -c -o bf.o` Results: ``` task-clock [delta: -360.21 ± 356.75, delta(%): -1.7760 ± 1.7589, p-value: 0.047951, balance: -6] instructions [delta: 44061118 ± 13246382, delta(%): 0.0690 ± 0.0207, p-value: 0.000001, balance: 50] icache-misses [delta: -5534468 ± 2779620, delta(%): -0.4331 ± 0.2175, p-value: 0.028014, balance: -28] branch-misses [delta: -1624270 ± 1113244, delta(%): -0.3456 ± 0.2368, p-value: 0.030300, balance: -22] ``` Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D120177 --- bolt/include/bolt/Core/MCPlusBuilder.h | 10 + bolt/include/bolt/Passes/CMOVConversion.h | 85 +++++ bolt/lib/Passes/CMOVConversion.cpp | 287 +++++++++++++++ bolt/lib/Passes/CMakeLists.txt | 1 + bolt/lib/Rewrite/BinaryPassManager.cpp | 9 + bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 67 ++++ bolt/test/X86/cmov-conversion.s | 567 ++++++++++++++++++++++++++++++ 7 files changed, 1026 insertions(+) create mode 100644 bolt/include/bolt/Passes/CMOVConversion.h create mode 100644 bolt/lib/Passes/CMOVConversion.cpp create mode 100644 bolt/test/X86/cmov-conversion.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index e6f4222..2378f2b 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1292,6 +1292,16 @@ public: return false; } + /// Convert a move instruction into a conditional move instruction, given a + /// condition code. + virtual bool + convertMoveToConditionalMove(MCInst &Inst, unsigned CC, + bool AllowStackMemOp = false, + bool AllowBasePtrStackMemOp = false) const { + llvm_unreachable("not implemented"); + return false; + } + /// Lower a tail call instruction \p Inst if required by target. virtual bool lowerTailCall(MCInst &Inst) { llvm_unreachable("not implemented"); diff --git a/bolt/include/bolt/Passes/CMOVConversion.h b/bolt/include/bolt/Passes/CMOVConversion.h new file mode 100644 index 0000000..77ce223 --- /dev/null +++ b/bolt/include/bolt/Passes/CMOVConversion.h @@ -0,0 +1,85 @@ +//===- bolt/Passes/CMOVConversion.h ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass finds the following patterns: +// jcc +// / \ +// (empty) mov src, dst +// \ / +// +// and replaces them with: +// +// cmovcc src, dst +// +// The advantage of performing this conversion in BOLT (compared to compiler +// heuristic driven instruction selection) is that BOLT can use LBR +// misprediction information and only convert poorly predictable branches. +// Note that branch misprediction rate is different from branch bias. +// For well-predictable branches, it might be beneficial to leave jcc+mov as is +// from microarchitectural perspective to avoid unneeded dependencies (CMOV +// instruction has a dataflow dependence on flags and both operands). +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_CMOVCONVERSION_H +#define BOLT_PASSES_CMOVCONVERSION_H + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +/// Pass for folding eligible hammocks into CMOV's if profitable. +class CMOVConversion : public BinaryFunctionPass { + struct Stats { + /// Record how many possible cases there are. + uint64_t StaticPossible = 0; + uint64_t DynamicPossible = 0; + + /// Record how many cases were converted. + uint64_t StaticPerformed = 0; + uint64_t DynamicPerformed = 0; + + /// Record how many mispredictions were eliminated. + uint64_t PossibleMP = 0; + uint64_t RemovedMP = 0; + + Stats operator+(const Stats &O) { + StaticPossible += O.StaticPossible; + DynamicPossible += O.DynamicPossible; + StaticPerformed += O.StaticPerformed; + DynamicPerformed += O.DynamicPerformed; + PossibleMP += O.PossibleMP; + RemovedMP += O.RemovedMP; + return *this; + } + double getStaticRatio() { return (double)StaticPerformed / StaticPossible; } + double getDynamicRatio() { + return (double)DynamicPerformed / DynamicPossible; + } + double getMPRatio() { return (double)RemovedMP / PossibleMP; } + + void dump(); + }; + // BinaryContext-wide stats + Stats Global; + + void runOnFunction(BinaryFunction &Function); + +public: + explicit CMOVConversion() : BinaryFunctionPass(false) {} + + const char *getName() const override { return "CMOV conversion"; } + + void runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/lib/Passes/CMOVConversion.cpp b/bolt/lib/Passes/CMOVConversion.cpp new file mode 100644 index 0000000..6213479 --- /dev/null +++ b/bolt/lib/Passes/CMOVConversion.cpp @@ -0,0 +1,287 @@ +//===- bolt/Passes/CMOVConversion.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CMOV conversion pass. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/CMOVConversion.h" +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Utils/CommandLineOpts.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include + +#define DEBUG_TYPE "cmov" + +using namespace llvm; + +namespace opts { + +extern cl::OptionCategory BoltOptCategory; + +static cl::opt BiasThreshold( + "cmov-conversion-bias-threshold", + cl::desc("minimum condition bias (pct) to perform a CMOV conversion, " + "-1 to not account bias"), + cl::ReallyHidden, cl::init(1), cl::cat(BoltOptCategory)); + +static cl::opt MispredictionThreshold( + "cmov-conversion-misprediction-threshold", + cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, " + "-1 to not account misprediction rate"), + cl::ReallyHidden, cl::init(5), cl::cat(BoltOptCategory)); + +static cl::opt ConvertStackMemOperand( + "cmov-conversion-convert-stack-mem-operand", + cl::desc("convert moves with stack memory operand (potentially unsafe)"), + cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory)); + +static cl::opt ConvertBasePtrStackMemOperand( + "cmov-conversion-convert-rbp-stack-mem-operand", + cl::desc("convert moves with rbp stack memory operand (unsafe, must be off " + "for binaries compiled with -fomit-frame-pointer)"), + cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory)); + +} // namespace opts + +namespace llvm { +namespace bolt { + +// Return true if the CFG conforms to the following subgraph: +// Predecessor +// / \ +// | RHS +// \ / +// LHS +// Caller guarantees that LHS and RHS share the same predecessor. +bool isIfThenSubgraph(const BinaryBasicBlock &LHS, + const BinaryBasicBlock &RHS) { + if (LHS.pred_size() != 2 || RHS.pred_size() != 1) + return false; + + // Sanity check + BinaryBasicBlock *Predecessor = *RHS.pred_begin(); + assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph"); + (void)Predecessor; + + if (!LHS.isPredecessor(&RHS)) + return false; + if (RHS.succ_size() != 1) + return false; + return true; +} + +bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc, + BinaryBasicBlock *&UnconditionalSucc, + bool &IsConditionalTaken) { + BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(true); + BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(false); + bool IsIfThenTaken = isIfThenSubgraph(*FallthroughSucc, *TakenSucc); + bool IsIfThenFallthrough = isIfThenSubgraph(*TakenSucc, *FallthroughSucc); + if (!IsIfThenFallthrough && !IsIfThenTaken) + return false; + assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph"); + + // Output parameters + ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc; + UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc; + IsConditionalTaken = IsIfThenTaken; + return true; +} + +// Return true if basic block instructions can be converted into cmov(s). +bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB, + unsigned CC) { + if (BB.empty()) + return false; + const MCInst *LastInst = BB.getLastNonPseudoInstr(); + // Only pseudo instructions, can't be converted into CMOV + if (LastInst == nullptr) + return false; + for (const MCInst &Inst : BB) { + if (BC.MIB->isPseudo(Inst)) + continue; + // Unconditional branch as a last instruction is OK + if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst)) + continue; + MCInst Cmov(Inst); + // GPR move is OK + if (!BC.MIB->convertMoveToConditionalMove( + Cmov, CC, opts::ConvertStackMemOperand, + opts::ConvertBasePtrStackMemOperand)) { + LLVM_DEBUG({ + dbgs() << BB.getName() << ": can't convert instruction "; + BC.printInstruction(dbgs(), Cmov); + }); + return false; + } + } + return true; +} + +void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) { + for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) { + if (BC.MIB->isPseudo(*II)) + continue; + if (BC.MIB->isUnconditionalBranch(*II)) { + // XXX: this invalidates II but we return immediately + BB.eraseInstruction(II); + return; + } + bool Result = BC.MIB->convertMoveToConditionalMove( + *II, CC, opts::ConvertStackMemOperand, + opts::ConvertBasePtrStackMemOperand); + assert(Result && "unexpected instruction"); + (void)Result; + } +} + +// Returns misprediction rate if the profile data is available, -1 otherwise. +std::pair +calculateMispredictionRate(const BinaryBasicBlock &BB) { + uint64_t TotalExecCount = 0; + uint64_t TotalMispredictionCount = 0; + for (auto BI : BB.branch_info()) { + TotalExecCount += BI.Count; + if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) + TotalMispredictionCount += BI.MispredictedCount; + } + if (!TotalExecCount) + return {-1, TotalMispredictionCount}; + return {100.0f * TotalMispredictionCount / TotalExecCount, + TotalMispredictionCount}; +} + +// Returns conditional succ bias if the profile is available, -1 otherwise. +int calculateConditionBias(const BinaryBasicBlock &BB, + const BinaryBasicBlock &ConditionalSucc) { + if (auto BranchStats = BB.getBranchStats(&ConditionalSucc)) + return BranchStats->first; + return -1; +} + +void CMOVConversion::Stats::dump() { + outs() << "converted static " << StaticPerformed << "/" << StaticPossible + << formatv(" ({0:P}) ", getStaticRatio()) + << "hammock(s) into CMOV sequences, with dynamic execution count " + << DynamicPerformed << "/" << DynamicPossible + << formatv(" ({0:P}), ", getDynamicRatio()) << "saving " << RemovedMP + << "/" << PossibleMP << formatv(" ({0:P}) ", getMPRatio()) + << "mispredictions\n"; +} + +void CMOVConversion::runOnFunction(BinaryFunction &Function) { + BinaryContext &BC = Function.getBinaryContext(); + bool Modified = false; + // Function-local stats + Stats Local; + // Traverse blocks in RPO, merging block with a converted cmov with its + // successor. + for (BinaryBasicBlock *BB : post_order(&Function)) { + uint64_t BBExecCount = BB->getKnownExecutionCount(); + if (BB->empty() || // The block must have instructions + BBExecCount == 0 || // must be hot + BB->succ_size() != 2 || // with two successors + BB->hasJumpTable()) // no jump table + continue; + + assert(BB->isValid() && "traversal internal error"); + + // Check branch instruction + auto BranchInstrIter = BB->getLastNonPseudo(); + if (BranchInstrIter == BB->rend() || + !BC.MIB->isConditionalBranch(*BranchInstrIter)) + continue; + + // Check successors + BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc; + bool IsConditionalTaken; + if (!matchCFGSubgraph(*BB, ConditionalSucc, UnconditionalSucc, + IsConditionalTaken)) { + LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n"); + continue; + } + + unsigned CC = BC.MIB->getCondCode(*BranchInstrIter); + if (!IsConditionalTaken) + CC = BC.MIB->getInvertedCondCode(CC); + // Check contents of the conditional block + if (!canConvertInstructions(BC, *ConditionalSucc, CC)) + continue; + + int ConditionBias = calculateConditionBias(*BB, *ConditionalSucc); + int MispredictionRate = 0; + uint64_t MispredictionCount = 0; + std::tie(MispredictionRate, MispredictionCount) = + calculateMispredictionRate(*BB); + + Local.StaticPossible++; + Local.DynamicPossible += BBExecCount; + Local.PossibleMP += MispredictionCount; + + // If the conditional successor is never executed, don't convert it + if (ConditionBias < opts::BiasThreshold) { + LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName() + << " bias = " << ConditionBias + << ", less than threshold " << opts::BiasThreshold + << '\n'); + continue; + } + + // Check the misprediction rate of a branch + if (MispredictionRate < opts::MispredictionThreshold) { + LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = " + << MispredictionRate << ", less than threshold " + << opts::MispredictionThreshold << '\n'); + continue; + } + + // remove conditional branch + BB->eraseInstruction(std::prev(BranchInstrIter.base())); + BB->removeAllSuccessors(); + // Convert instructions from the conditional successor into cmov's in BB. + convertMoves(BC, *ConditionalSucc, CC); + BB->addInstructions(ConditionalSucc->begin(), ConditionalSucc->end()); + ConditionalSucc->markValid(false); + + // RPO traversal guarantees that the successor is visited and merged if + // necessary. Merge the unconditional successor into the current block. + BB->addInstructions(UnconditionalSucc->begin(), UnconditionalSucc->end()); + UnconditionalSucc->moveAllSuccessorsTo(BB); + UnconditionalSucc->markValid(false); + Local.StaticPerformed++; + Local.DynamicPerformed += BBExecCount; + Local.RemovedMP += MispredictionCount; + Modified = true; + } + if (Modified) + Function.eraseInvalidBBs(); + if (opts::Verbosity > 1) { + outs() << "BOLT-INFO: CMOVConversion: " << Function << ", "; + Local.dump(); + } + Global = Global + Local; +} + +void CMOVConversion::runOnFunctions(BinaryContext &BC) { + for (auto &It : BC.getBinaryFunctions()) { + BinaryFunction &Function = It.second; + if (!shouldOptimize(Function)) + continue; + runOnFunction(Function); + } + + outs() << "BOLT-INFO: CMOVConversion total: "; + Global.dump(); +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index c97f203..95d872d 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMBOLTPasses AsmDump.cpp BinaryPasses.cpp BinaryFunctionCallGraph.cpp + CMOVConversion.cpp CacheMetrics.cpp CallGraph.cpp CallGraphWalker.cpp diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 199ed5f..929b734 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -11,6 +11,7 @@ #include "bolt/Passes/Aligner.h" #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" +#include "bolt/Passes/CMOVConversion.h" #include "bolt/Passes/FrameOptimizer.h" #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" @@ -247,6 +248,11 @@ ThreeWayBranchFlag("three-way-branch", cl::desc("reorder three way branches"), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); +static cl::opt CMOVConversionFlag("cmov-conversion", + cl::desc("fold jcc+mov into cmov"), + cl::ZeroOrMore, cl::ReallyHidden, + cl::cat(BoltOptCategory)); + } // namespace opts namespace llvm { @@ -393,6 +399,9 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(), opts::TailDuplicationFlag); + Manager.registerPass(std::make_unique(), + opts::CMOVConversionFlag); + // This pass syncs local branches with CFG. If any of the following // passes breaks the sync - they either need to re-run the pass or // fix branches consistency internally. diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index aca88e5..fa6f370 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -12,12 +12,15 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "bolt/Core/MCPlus.h" #include "bolt/Core/MCPlusBuilder.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" @@ -2134,6 +2137,70 @@ public: return true; } + bool + convertMoveToConditionalMove(MCInst &Inst, unsigned CC, bool AllowStackMemOp, + bool AllowBasePtrStackMemOp) const override { + // - Register-register moves are OK + // - Stores are filtered out by opcode (no store CMOV) + // - Non-stack loads are prohibited (generally unsafe) + // - Stack loads are OK if AllowStackMemOp is true + // - Stack loads with RBP are OK if AllowBasePtrStackMemOp is true + if (isLoad(Inst)) { + // If stack memory operands are not allowed, no loads are allowed + if (!AllowStackMemOp) + return false; + + // If stack memory operands are allowed, check if it's a load from stack + bool IsLoad, IsStore, IsStoreFromReg, IsSimple, IsIndexed; + MCPhysReg Reg; + int32_t SrcImm; + uint16_t StackPtrReg; + int64_t StackOffset; + uint8_t Size; + bool IsStackAccess = + isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, SrcImm, + StackPtrReg, StackOffset, Size, IsSimple, IsIndexed); + // Prohibit non-stack-based loads + if (!IsStackAccess) + return false; + // If stack memory operands are allowed, check if it's RBP-based + if (!AllowBasePtrStackMemOp && + RegInfo->isSubRegisterEq(X86::RBP, StackPtrReg)) + return false; + } + + unsigned NewOpcode = 0; + switch (Inst.getOpcode()) { + case X86::MOV16rr: + NewOpcode = X86::CMOV16rr; + break; + case X86::MOV16rm: + NewOpcode = X86::CMOV16rm; + break; + case X86::MOV32rr: + NewOpcode = X86::CMOV32rr; + break; + case X86::MOV32rm: + NewOpcode = X86::CMOV32rm; + break; + case X86::MOV64rr: + NewOpcode = X86::CMOV64rr; + break; + case X86::MOV64rm: + NewOpcode = X86::CMOV64rm; + break; + default: + return false; + } + Inst.setOpcode(NewOpcode); + // Insert CC at the end of prime operands, before annotations + Inst.insert(Inst.begin() + MCPlus::getNumPrimeOperands(Inst), + MCOperand::createImm(CC)); + // CMOV is a 3-operand MCInst, so duplicate the destination as src1 + Inst.insert(Inst.begin(), Inst.getOperand(0)); + return true; + } + bool lowerTailCall(MCInst &Inst) override { if (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst)) { Inst.setOpcode(X86::JMP_1); diff --git a/bolt/test/X86/cmov-conversion.s b/bolt/test/X86/cmov-conversion.s new file mode 100644 index 0000000..b876600 --- /dev/null +++ b/bolt/test/X86/cmov-conversion.s @@ -0,0 +1,567 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -data %t.fdata -o %t -lite=0 -v=2 \ +# RUN: -cmov-conversion -cmov-conversion-misprediction-threshold=-1 \ +# RUN: -cmov-conversion-bias-threshold=-1 -print-all | FileCheck %s +# CHECK: BOLT-INFO: CMOVConversion: CmovInHotPath, converted static 1/1 +# CHECK: BOLT-INFO: CMOVConversion: CmovNotInHotPath, converted static 1/1 +# CHECK: BOLT-INFO: CMOVConversion: MaxIndex, converted static 1/1 +# CHECK: BOLT-INFO: CMOVConversion: MaxIndex_unpredictable, converted static 1/1 +# CHECK: BOLT-INFO: CMOVConversion: MaxValue, converted static 1/1 +# CHECK: BOLT-INFO: CMOVConversion: BinarySearch, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: Transform, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_unpredictable, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group2, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_conflicting_dir, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr2, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr3, converted static 0/0 +# CHECK: BOLT-INFO: CMOVConversion: test_memoperand_loop, converted static 1/1 +# CHECK: BOLT-INFO: CMOVConversion: CmovBackToBack, converted static 2/2 +# CHECK: BOLT-INFO: CMOVConversion total: converted static 8/8 + + .globl _start +_start: + .globl CmovInHotPath # -- Begin function CmovInHotPath + .p2align 4, 0x90 + .type CmovInHotPath,@function +CmovInHotPath: # @CmovInHotPath +# CHECK-LABEL: Binary Function "CmovInHotPath" after CMOV conversion +# FDATA: 0 [unknown] 0 1 CmovInHotPath 0 1 2 + .cfi_startproc +# %bb.0: # %entry + testl %edi, %edi + jle LBB0_5 +# %bb.1: # %for.body.preheader + movl %edi, %r8d + xorl %edi, %edi +# FDATA: 0 [unknown] 0 1 CmovInHotPath #LBB0_2# 1 2 +LBB0_2: # %for.body + movl (%rcx,%rdi,4), %eax + leal 1(%rax), %r9d + imull %esi, %eax + movl $10, %r10d + cmpl %edx, %eax +# CHECK: cmpl %edx, %eax +# CHECK-NEXT: cmovlel %r9d, %r10d +LBB0_2_br: + jg LBB0_4 +# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_3# 1 2 +# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_4# 1 2 +# %bb.3: # %for.body +LBB0_3: + movl %r9d, %r10d +LBB0_4: # %for.body + imull %r9d, %r10d + movl %r10d, (%rcx,%rdi,4) + addq $1, %rdi + cmpq %rdi, %r8 + jne LBB0_2 +LBB0_5: # %for.cond.cleanup + retq +Lfunc_end0: + .size CmovInHotPath, Lfunc_end0-CmovInHotPath + .cfi_endproc + # -- End function + .globl CmovNotInHotPath # -- Begin function CmovNotInHotPath + .p2align 4, 0x90 + .type CmovNotInHotPath,@function +CmovNotInHotPath: # @CmovNotInHotPath +# CHECK-LABEL: Binary Function "CmovNotInHotPath" after CMOV conversion +# FDATA: 0 [unknown] 0 1 CmovNotInHotPath 0 1 2 + .cfi_startproc +# %bb.0: # %entry + testl %edi, %edi + jle LBB1_5 +# %bb.1: # %for.body.preheader + movl %edx, %r9d + movl %edi, %r10d + xorl %edi, %edi +# FDATA: 0 [unknown] 0 1 CmovNotInHotPath #LBB1_2# 1 2 +LBB1_2: # %for.body + movl (%rcx,%rdi,4), %r11d + movl %r11d, %eax + imull %esi, %eax + movl $10, %edx + cmpl %r9d, %eax +# CHECK: cmpl %r9d, %eax +# CHECK-NEXT: cmovlel %r11d, %edx +LBB1_4_br: + jg LBB1_4 +# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_3# 1 2 +# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_4# 1 2 +# %bb.3: # %for.body +LBB1_3: + movl %r11d, %edx +LBB1_4: # %for.body + movl %edx, (%rcx,%rdi,4) + movl (%r8,%rdi,4), %eax + cltd + idivl %r9d + movl %eax, (%r8,%rdi,4) + addq $1, %rdi + cmpq %rdi, %r10 + jne LBB1_2 +LBB1_5: # %for.cond.cleanup + retq +Lfunc_end1: + .size CmovNotInHotPath, Lfunc_end1-CmovNotInHotPath + .cfi_endproc + # -- End function + .globl MaxIndex # -- Begin function MaxIndex + .p2align 4, 0x90 + .type MaxIndex,@function +MaxIndex: # @MaxIndex +# CHECK-LABEL: Binary Function "MaxIndex" after CMOV conversion +# FDATA: 0 [unknown] 0 1 MaxIndex 0 1 2 + .cfi_startproc +# %bb.0: # %entry + xorl %eax, %eax + cmpl $2, %edi + jl LBB2_5 +# %bb.1: # %for.body.preheader + movl %edi, %r8d + xorl %edi, %edi + movl $1, %edx +# FDATA: 0 [unknown] 0 1 MaxIndex #LBB2_2# 1 2 +LBB2_2: # %for.body + movl (%rsi,%rdx,4), %r9d + movslq %edi, %rcx + movl %edx, %eax + cmpl (%rsi,%rcx,4), %r9d +# CHECK: cmpl (%rsi,%rcx,4), %r9d +# CHECK-NEXT: cmovlel %edi, %eax +LBB2_2_br: + jg LBB2_4 +# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_3# 1 2 +# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_4# 1 2 +# %bb.3: # %for.body +LBB2_3: + movl %edi, %eax +LBB2_4: # %for.body + addq $1, %rdx + movl %eax, %edi + cmpq %rdx, %r8 + jne LBB2_2 +LBB2_5: # %for.cond.cleanup + retq +Lfunc_end2: + .size MaxIndex, Lfunc_end2-MaxIndex + .cfi_endproc + # -- End function + .globl MaxIndex_unpredictable # -- Begin function MaxIndex_unpredictable + .p2align 4, 0x90 + .type MaxIndex_unpredictable,@function +MaxIndex_unpredictable: # @MaxIndex_unpredictable +# CHECK-LABEL: Binary Function "MaxIndex_unpredictable" after CMOV conversion +# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable 0 1 2 + .cfi_startproc +# %bb.0: # %entry + xorl %eax, %eax + cmpl $2, %edi + jl LBB3_5 +# %bb.1: # %for.body.preheader + movl %edi, %r8d + xorl %edi, %edi + movl $1, %edx +# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable #LBB3_2# 1 2 +LBB3_2: # %for.body + movl (%rsi,%rdx,4), %r9d + movslq %edi, %rcx + movl %edx, %eax + cmpl (%rsi,%rcx,4), %r9d +# CHECK: cmpl (%rsi,%rcx,4), %r9d +# CHECK-NEXT: cmovlel %edi, %eax +LBB3_2_br: + jg LBB3_4 +# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_3# 1 2 +# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_4# 1 2 +# %bb.3: # %for.body +LBB3_3: + movl %edi, %eax +LBB3_4: # %for.body + addq $1, %rdx + movl %eax, %edi + cmpq %rdx, %r8 + jne LBB3_2 +LBB3_5: # %for.cond.cleanup + retq +Lfunc_end3: + .size MaxIndex_unpredictable, Lfunc_end3-MaxIndex_unpredictable + .cfi_endproc + # -- End function + .globl MaxValue # -- Begin function MaxValue + .p2align 4, 0x90 + .type MaxValue,@function +MaxValue: # @MaxValue +# CHECK-LABEL: Binary Function "MaxValue" after CMOV conversion +# FDATA: 0 [unknown] 0 1 MaxValue 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl (%rsi), %ecx + cmpl $2, %edi + jge LBB4_3 +# %bb.1: +LBB4_1: + movl %ecx, %eax +LBB4_2: # %for.cond.cleanup + retq +LBB4_3: # %for.body.preheader + movl %edi, %edi + movl $1, %edx +LBB4_4: # %for.body + movl (%rsi,%rdx,4), %eax + cmpl %ecx, %eax +# CHECK: cmpl %ecx, %eax +# CHECK-NEXT: cmovlel %ecx, %eax +LBB4_4_br: + jg LBB4_6 +# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_5# 1 2 +# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_6# 1 2 +# %bb.5: # %for.body +LBB4_5: + movl %ecx, %eax +LBB4_6: # %for.body + addq $1, %rdx + movl %eax, %ecx + cmpq %rdx, %rdi + je LBB4_2 + jmp LBB4_4 +Lfunc_end4: + .size MaxValue, Lfunc_end4-MaxValue + .cfi_endproc + # -- End function + .globl BinarySearch # -- Begin function BinarySearch + .p2align 4, 0x90 + .type BinarySearch,@function +BinarySearch: # @BinarySearch +# CHECK-LABEL: Binary Function "BinarySearch" after CMOV conversion +# FDATA: 0 [unknown] 0 1 BinarySearch 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl (%rsi), %eax + jmp LBB5_2 +LBB5_1: # %while.body + movl %ecx, %eax + xorl %ecx, %ecx + btl %eax, %edi + setae %cl + movq 8(%rdx,%rcx,8), %rdx +LBB5_2: # %while.body + movl (%rdx), %ecx + cmpl %ecx, %eax + ja LBB5_1 +# %bb.3: # %while.end + retq +Lfunc_end5: + .size BinarySearch, Lfunc_end5-BinarySearch + .cfi_endproc + # -- End function + .globl Transform # -- Begin function Transform + .p2align 4, 0x90 + .type Transform,@function +Transform: # @Transform +# CHECK-LABEL: Binary Function "Transform" after CMOV conversion +# FDATA: 0 [unknown] 0 1 Transform 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movb $1, %al + testb %al, %al + jne LBB6_5 +# %bb.1: # %while.body.preheader + movl %edx, %r8d + xorl %esi, %esi +LBB6_2: # %while.body + movslq %esi, %rsi + movl (%rdi,%rsi,4), %eax + xorl %edx, %edx + divl %r8d + movl %eax, %edx + movl $11, %eax + movl %r8d, %ecx + cmpl %r8d, %edx + ja LBB6_4 +# %bb.3: # %while.body + movl $22, %eax + movl $22, %ecx +LBB6_4: # %while.body + xorl %edx, %edx + divl %ecx + movl %edx, (%rdi,%rsi,4) + addl $1, %esi + cmpl %r9d, %esi + ja LBB6_2 +LBB6_5: # %while.end + retq +Lfunc_end6: + .size Transform, Lfunc_end6-Transform + .cfi_endproc + # -- End function + .globl test_cmov_memoperand # -- Begin function test_cmov_memoperand + .p2align 4, 0x90 + .type test_cmov_memoperand,@function +test_cmov_memoperand: # @test_cmov_memoperand +# CHECK-LABEL: Binary Function "test_cmov_memoperand" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl %edx, %eax + cmpl %esi, %edi + ja LBB7_2 +# %bb.1: # %entry + movl (%rcx), %eax +LBB7_2: # %entry + retq +Lfunc_end7: + .size test_cmov_memoperand, Lfunc_end7-test_cmov_memoperand + .cfi_endproc + # -- End function + .globl test_cmov_memoperand_unpredictable # -- Begin function test_cmov_memoperand_unpredictable + .p2align 4, 0x90 + .type test_cmov_memoperand_unpredictable,@function +test_cmov_memoperand_unpredictable: # @test_cmov_memoperand_unpredictable +# CHECK-LABEL: Binary Function "test_cmov_memoperand_unpredictable" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_unpredictable 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl %edx, %eax + cmpl %esi, %edi + ja LBB8_2 +# %bb.1: # %entry + movl (%rcx), %eax +LBB8_2: # %entry + retq +Lfunc_end8: + .size test_cmov_memoperand_unpredictable, Lfunc_end8-test_cmov_memoperand_unpredictable + .cfi_endproc + # -- End function + .globl test_cmov_memoperand_in_group # -- Begin function test_cmov_memoperand_in_group + .p2align 4, 0x90 + .type test_cmov_memoperand_in_group,@function +test_cmov_memoperand_in_group: # @test_cmov_memoperand_in_group +# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl %edx, %eax + movl %edx, %r8d + cmpl %esi, %edi + ja LBB9_2 +# %bb.1: # %entry + movl (%rcx), %r8d + movl %edi, %eax + movl %esi, %edx +LBB9_2: # %entry + addl %r8d, %eax + addl %edx, %eax + retq +Lfunc_end9: + .size test_cmov_memoperand_in_group, Lfunc_end9-test_cmov_memoperand_in_group + .cfi_endproc + # -- End function + .globl test_cmov_memoperand_in_group2 # -- Begin function test_cmov_memoperand_in_group2 + .p2align 4, 0x90 + .type test_cmov_memoperand_in_group2,@function +test_cmov_memoperand_in_group2: # @test_cmov_memoperand_in_group2 +# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group2" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group2 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl %edx, %eax + movl %edx, %r8d + cmpl %esi, %edi + jbe LBB10_2 +# %bb.1: # %entry + movl (%rcx), %r8d + movl %edi, %eax + movl %esi, %edx +LBB10_2: # %entry + addl %r8d, %eax + addl %edx, %eax + retq +Lfunc_end10: + .size test_cmov_memoperand_in_group2, Lfunc_end10-test_cmov_memoperand_in_group2 + .cfi_endproc + # -- End function + .globl test_cmov_memoperand_conflicting_dir # -- Begin function test_cmov_memoperand_conflicting_dir + .p2align 4, 0x90 + .type test_cmov_memoperand_conflicting_dir,@function +test_cmov_memoperand_conflicting_dir: # @test_cmov_memoperand_conflicting_dir +# CHECK-LABEL: Binary Function "test_cmov_memoperand_conflicting_dir" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_conflicting_dir 0 1 2 + .cfi_startproc +# %bb.0: # %entry + cmpl %esi, %edi + movl (%rcx), %eax + cmoval %edx, %eax + cmoval (%r8), %edx + addl %edx, %eax + retq +Lfunc_end11: + .size test_cmov_memoperand_conflicting_dir, Lfunc_end11-test_cmov_memoperand_conflicting_dir + .cfi_endproc + # -- End function + .globl test_cmov_memoperand_in_group_reuse_for_addr # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr + .p2align 4, 0x90 + .type test_cmov_memoperand_in_group_reuse_for_addr,@function +test_cmov_memoperand_in_group_reuse_for_addr: # @test_cmov_memoperand_in_group_reuse_for_addr +# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl %edi, %eax + cmpl %esi, %edi + ja LBB12_2 +# %bb.1: # %entry + movl (%rcx), %eax +LBB12_2: # %entry + retq +Lfunc_end12: + .size test_cmov_memoperand_in_group_reuse_for_addr, Lfunc_end12-test_cmov_memoperand_in_group_reuse_for_addr + .cfi_endproc + # -- End function + .globl test_cmov_memoperand_in_group_reuse_for_addr2 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr2 + .p2align 4, 0x90 + .type test_cmov_memoperand_in_group_reuse_for_addr2,@function +test_cmov_memoperand_in_group_reuse_for_addr2: # @test_cmov_memoperand_in_group_reuse_for_addr2 +# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr2" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr2 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl %edi, %eax + cmpl %esi, %edi + ja LBB13_2 +# %bb.1: # %entry + movq (%rcx), %rax + movl (%rax), %eax +LBB13_2: # %entry + retq +Lfunc_end13: + .size test_cmov_memoperand_in_group_reuse_for_addr2, Lfunc_end13-test_cmov_memoperand_in_group_reuse_for_addr2 + .cfi_endproc + # -- End function + .globl test_cmov_memoperand_in_group_reuse_for_addr3 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr3 + .p2align 4, 0x90 + .type test_cmov_memoperand_in_group_reuse_for_addr3,@function +test_cmov_memoperand_in_group_reuse_for_addr3: # @test_cmov_memoperand_in_group_reuse_for_addr3 +# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr3" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr3 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movl %edi, %eax + cmpl %esi, %edi + ja LBB14_2 +# %bb.1: # %entry + movl (%rcx), %eax +LBB14_2: # %entry + retq +Lfunc_end14: + .size test_cmov_memoperand_in_group_reuse_for_addr3, Lfunc_end14-test_cmov_memoperand_in_group_reuse_for_addr3 + .cfi_endproc + # -- End function + .globl test_memoperand_loop # -- Begin function test_memoperand_loop + .p2align 4, 0x90 + .type test_memoperand_loop,@function +test_memoperand_loop: # @test_memoperand_loop +# CHECK-LABEL: Binary Function "test_memoperand_loop" after CMOV conversion +# FDATA: 0 [unknown] 0 1 test_memoperand_loop 0 1 2 + .cfi_startproc +# %bb.0: # %entry + movq begin@GOTPCREL(%rip), %r8 + movq (%r8), %rax + movq end@GOTPCREL(%rip), %rcx + movq (%rcx), %rdx + xorl %esi, %esi + movq %rax, %rcx +LBB15_1: # %loop.body + addq $8, %rcx + cmpq %rdx, %rcx + ja LBB15_3 +# %bb.2: # %loop.body + movq (%r8), %rcx +LBB15_3: # %loop.body + movl %edi, (%rcx) + addq $8, %rcx + cmpq %rdx, %rcx +# CHECK: movl %edi, (%rcx) +# CHECK-NEXT: addq $0x8, %rcx +# CHECK-NEXT: cmpq %rdx, %rcx +# CHECK-NEXT: cmovbeq %rax, %rcx +LBB15_3_br: + ja LBB15_5 +# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_4# 1 2 +# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_5# 1 2 +# %bb.4: # %loop.body +LBB15_4: + movq %rax, %rcx +LBB15_5: # %loop.body + movl %edi, (%rcx) + addl $1, %esi + cmpl $1024, %esi # imm = 0x400 + jl LBB15_1 +# %bb.6: # %exit + retq +Lfunc_end15: + .size test_memoperand_loop, Lfunc_end15-test_memoperand_loop + .cfi_endproc + # -- End function + .globl CmovBackToBack # -- Begin function CmovBackToBack + .p2align 4, 0x90 + .type CmovBackToBack,@function +CmovBackToBack: # @CmovBackToBack +# CHECK-LABEL: Binary Function "CmovBackToBack" after CMOV conversion +# FDATA: 0 [unknown] 0 1 CmovBackToBack 0 1 2 + .cfi_startproc + testl %edi, %edi + jle LBB16_5 + movl %edi, %r8d + xorl %edi, %edi +# FDATA: 0 [unknown] 0 1 CmovBackToBack #LBB16_2# 1 2 +LBB16_2: # %for.body + movl (%rcx,%rdi,4), %eax + leal 1(%rax), %r9d + imull %esi, %eax + movl $10, %r10d + cmpl %edx, %eax +# CHECK: cmpl %edx, %eax +# CHECK-NEXT: cmovlel %r9d, %r10d +LBB16_2_br: + jg LBB16_4 +# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_3# 1 2 +# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_4# 1 2 +LBB16_3: + movl %r9d, %r10d +LBB16_4: # %for.body +# CHECK-NEXT: cmovlel %r9d, %r10d +LBB16_6_br: + jg LBB16_8 +# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_7# 1 2 +# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_8# 1 2 +LBB16_7: + movl %r9d, %r10d +LBB16_8: # %for.body + imull %r9d, %r10d + movl %r10d, (%rcx,%rdi,4) + addq $1, %rdi + cmpq %rdi, %r8 + jne LBB16_2 +LBB16_5: # %for.cond.cleanup + retq +Lfunc_end16: + .size CmovBackToBack, Lfunc_end16-CmovBackToBack + .cfi_endproc + # -- End function + .data + .globl begin +begin: + .quad 0xdeadbeef + .globl end +end: + .quad 0xfaceb00c + -- 2.7.4