/// blocks may contain out of date or incorrect information.
bool IsValid{true};
+ /// Last computed hash value.
+ mutable uint64_t Hash{0};
+
private:
BinaryBasicBlock() = delete;
BinaryBasicBlock(const BinaryBasicBlock &) = delete;
/// Check if the block has a jump table instruction.
bool hasJumpTable() const { return getJumpTable() != nullptr; }
+ /// Returns the last computed hash value of the block.
+ uint64_t getHash() const { return Hash; }
+
private:
void adjustNumPseudos(const MCInst &Inst, int Sign);
/// Set the index of this basic block.
void setIndex(unsigned I) { Index = I; }
+ /// Sets the hash value of the basic block.
+ void setHash(uint64_t Value) const { Hash = Value; }
+
template <typename T> void clearList(T &List) {
T TempList;
TempList.swap(List);
return std::string();
}) const;
+ /// Compute hash values for each block of the function.
+ void computeBlockHashes() const;
+
void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; }
/// Return DWARF compile unit for this function.
--- /dev/null
+//===- bolt/Core/HashUtilities.h - Misc hash utilities --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions for computing hash values over BinaryFunction
+// and BinaryBasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_CORE_HASH_UTILITIES_H
+#define BOLT_CORE_HASH_UTILITIES_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryContext.h"
+
+namespace llvm {
+namespace bolt {
+
+uint16_t hash_64_to_16(const uint64_t Hash);
+
+std::string hashInteger(uint64_t Value);
+
+std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol);
+
+std::string hashExpr(BinaryContext &BC, const MCExpr &Expr);
+
+std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand);
+
+using OperandHashFuncTy = function_ref<typename std::string(const MCOperand &)>;
+
+std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
+ OperandHashFuncTy OperandHashFunc);
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
+ virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
+ llvm_unreachable("not implemented");
+ return 0;
+ }
+
/// Create increment contents of target by 1 for Instrumentation
virtual InstructionListType createInstrIncMemory(const MCSymbol *Target,
MCContext *Ctx,
static void mapping(IO &YamlIO, bolt::BinaryBasicBlockProfile &BBP) {
YamlIO.mapRequired("bid", BBP.Index);
YamlIO.mapRequired("insns", BBP.NumInstructions);
+ YamlIO.mapOptional("hash", BBP.Hash, (llvm::yaml::Hex64)0);
YamlIO.mapOptional("exec", BBP.ExecCount, (uint64_t)0);
YamlIO.mapOptional("events", BBP.EventCount, (uint64_t)0);
YamlIO.mapOptional("calls", BBP.CallSites,
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryDomTree.h"
#include "bolt/Core/DynoStats.h"
+#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/NameShortener.h"
// The hash is computed by creating a string of all instruction opcodes and
// possibly their operands and then hashing that string with std::hash.
std::string HashString;
- for (const BinaryBasicBlock *BB : Order) {
- for (const MCInst &Inst : *BB) {
- unsigned Opcode = Inst.getOpcode();
-
- if (BC.MIB->isPseudo(Inst))
- continue;
+ for (const BinaryBasicBlock *BB : Order)
+ HashString.append(hashBlock(BC, *BB, OperandHashFunc));
- // Ignore unconditional jumps since we check CFG consistency by processing
- // basic blocks in order and do not rely on branches to be in-sync with
- // CFG. Note that we still use condition code of conditional jumps.
- if (BC.MIB->isUnconditionalBranch(Inst))
- continue;
-
- if (Opcode == 0)
- HashString.push_back(0);
-
- while (Opcode) {
- uint8_t LSB = Opcode & 0xff;
- HashString.push_back(LSB);
- Opcode = Opcode >> 8;
- }
+ return Hash = std::hash<std::string>{}(HashString);
+}
- for (const MCOperand &Op : MCPlus::primeOperands(Inst))
- HashString.append(OperandHashFunc(Op));
- }
+void BinaryFunction::computeBlockHashes() const {
+ for (const BinaryBasicBlock *BB : BasicBlocks) {
+ std::string Hash =
+ hashBlock(BC, *BB, [](const MCOperand &Op) { return std::string(); });
+ BB->setHash(std::hash<std::string>{}(Hash));
}
-
- return Hash = std::hash<std::string>{}(HashString);
}
void BinaryFunction::insertBasicBlocks(
DynoStats.cpp
Exceptions.cpp
FunctionLayout.cpp
+ HashUtilities.cpp
JumpTable.cpp
MCPlusBuilder.cpp
ParallelUtilities.cpp
--- /dev/null
+//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Computation of hash values over BinaryFunction and BinaryBasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/HashUtilities.h"
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryFunction.h"
+
+namespace llvm {
+namespace bolt {
+
+/// Hashing a 64-bit integer to a 16-bit one.
+uint16_t hash_64_to_16(const uint64_t Hash) {
+ uint16_t Res = (uint16_t)(Hash & 0xFFFF);
+ Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
+ Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
+ Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
+ return Res;
+}
+
+std::string hashInteger(uint64_t Value) {
+ std::string HashString;
+ if (Value == 0)
+ HashString.push_back(0);
+
+ while (Value) {
+ uint8_t LSB = Value & 0xff;
+ HashString.push_back(LSB);
+ Value >>= 8;
+ }
+
+ return HashString;
+}
+
+std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
+ std::string HashString;
+
+ // Ignore function references.
+ if (BC.getFunctionForSymbol(&Symbol))
+ return HashString;
+
+ llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
+ if (!ErrorOrValue)
+ return HashString;
+
+ // Ignore jump table references.
+ if (BC.getJumpTableContainingAddress(*ErrorOrValue))
+ return HashString;
+
+ return HashString.append(hashInteger(*ErrorOrValue));
+}
+
+std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
+ switch (Expr.getKind()) {
+ case MCExpr::Constant:
+ return hashInteger(cast<MCConstantExpr>(Expr).getValue());
+ case MCExpr::SymbolRef:
+ return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
+ case MCExpr::Unary: {
+ const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
+ return hashInteger(UnaryExpr.getOpcode())
+ .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
+ }
+ case MCExpr::Binary: {
+ const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
+ return hashExpr(BC, *BinaryExpr.getLHS())
+ .append(hashInteger(BinaryExpr.getOpcode()))
+ .append(hashExpr(BC, *BinaryExpr.getRHS()));
+ }
+ case MCExpr::Target:
+ return std::string();
+ }
+
+ llvm_unreachable("invalid expression kind");
+}
+
+std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
+ if (Operand.isImm())
+ return hashInteger(Operand.getImm());
+ if (Operand.isReg())
+ return hashInteger(Operand.getReg());
+ if (Operand.isExpr())
+ return hashExpr(BC, *Operand.getExpr());
+
+ return std::string();
+}
+
+std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
+ OperandHashFuncTy OperandHashFunc) {
+ const bool IsX86 = BC.isX86();
+
+ // The hash is computed by creating a string of all instruction opcodes and
+ // possibly their operands and then hashing that string with std::hash.
+ std::string HashString;
+
+ for (const MCInst &Inst : BB) {
+ if (BC.MIB->isPseudo(Inst))
+ continue;
+
+ unsigned Opcode = Inst.getOpcode();
+
+ // Ignore unconditional jumps since we check CFG consistency by processing
+ // basic blocks in order and do not rely on branches to be in-sync with
+ // CFG. Note that we still use condition code of conditional jumps.
+ if (BC.MIB->isUnconditionalBranch(Inst))
+ continue;
+
+ if (IsX86 && BC.MIB->isConditionalBranch(Inst))
+ Opcode = BC.MIB->getShortBranchOpcode(Opcode);
+
+ if (Opcode == 0)
+ HashString.push_back(0);
+
+ while (Opcode) {
+ uint8_t LSB = Opcode & 0xff;
+ HashString.push_back(LSB);
+ Opcode = Opcode >> 8;
+ }
+
+ for (const MCOperand &Op : MCPlus::primeOperands(Inst))
+ HashString.append(OperandHashFunc(Op));
+ }
+ return HashString;
+}
+
+} // namespace bolt
+} // namespace llvm
//===----------------------------------------------------------------------===//
#include "bolt/Passes/IdenticalCodeFolding.h"
+#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/ParallelUtilities.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
KeyHash, KeyEqual>
IdenticalBucketsMap;
-static std::string hashInteger(uint64_t Value) {
- std::string HashString;
- if (Value == 0)
- HashString.push_back(0);
-
- while (Value) {
- uint8_t LSB = Value & 0xff;
- HashString.push_back(LSB);
- Value >>= 8;
- }
-
- return HashString;
-}
-
-static std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
- std::string HashString;
-
- // Ignore function references.
- if (BC.getFunctionForSymbol(&Symbol))
- return HashString;
-
- llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
- if (!ErrorOrValue)
- return HashString;
-
- // Ignore jump table references.
- if (BC.getJumpTableContainingAddress(*ErrorOrValue))
- return HashString;
-
- return HashString.append(hashInteger(*ErrorOrValue));
-}
-
-static std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
- switch (Expr.getKind()) {
- case MCExpr::Constant:
- return hashInteger(cast<MCConstantExpr>(Expr).getValue());
- case MCExpr::SymbolRef:
- return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
- case MCExpr::Unary: {
- const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
- return hashInteger(UnaryExpr.getOpcode())
- .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
- }
- case MCExpr::Binary: {
- const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
- return hashExpr(BC, *BinaryExpr.getLHS())
- .append(hashInteger(BinaryExpr.getOpcode()))
- .append(hashExpr(BC, *BinaryExpr.getRHS()));
- }
- case MCExpr::Target:
- return std::string();
- }
-
- llvm_unreachable("invalid expression kind");
-}
-
-static std::string hashInstOperand(BinaryContext &BC,
- const MCOperand &Operand) {
- if (Operand.isImm())
- return hashInteger(Operand.getImm());
- if (Operand.isReg())
- return hashInteger(Operand.getReg());
- if (Operand.isExpr())
- return hashExpr(BC, *Operand.getExpr());
-
- return std::string();
-}
-
namespace llvm {
namespace bolt {
const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
+ // Prepare function and block hashes
+ BF.computeHash(/*UseDFS=*/true);
+ BF.computeBlockHashes();
+
YamlBF.Name = BF.getPrintName();
YamlBF.Id = BF.getFunctionNumber();
- YamlBF.Hash = BF.computeHash(/*UseDFS=*/true);
+ YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
yaml::bolt::BinaryBasicBlockProfile YamlBB;
YamlBB.Index = BB->getLayoutIndex();
YamlBB.NumInstructions = BB->getNumNonPseudos();
+ YamlBB.Hash = BB->getHash();
if (!LBRProfile) {
YamlBB.EventCount = BB->getKnownExecutionCount();
// Include landing pads with non-zero execution count.
if (YamlBB.CallSites.empty() && !BB->isEntryPoint() &&
!(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) {
+ // Include blocks having successors or predecessors with positive counts.
uint64_t SuccessorExecCount = 0;
for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
BB->branch_info())
SuccessorExecCount += BranchInfo.Count;
- if (!SuccessorExecCount)
+ uint64_t PredecessorExecCount = 0;
+ for (auto Pred : BB->predecessors())
+ PredecessorExecCount += Pred->getBranchInfo(*BB).Count;
+ if (!SuccessorExecCount && !PredecessorExecCount)
continue;
}
namespace {
-unsigned getShortBranchOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return Opcode;
- case X86::JMP_2: return X86::JMP_1;
- case X86::JMP_4: return X86::JMP_1;
- case X86::JCC_2: return X86::JCC_1;
- case X86::JCC_4: return X86::JCC_1;
- }
-}
-
unsigned getShortArithOpcode(unsigned Opcode) {
return X86::getShortOpcodeArith(Opcode);
}
MCPhysReg getX86R11() const override { return X86::R11; }
+ unsigned getShortBranchOpcode(unsigned Opcode) const override {
+ switch (Opcode) {
+ default:
+ return Opcode;
+ case X86::JMP_2:
+ return X86::JMP_1;
+ case X86::JMP_4:
+ return X86::JMP_1;
+ case X86::JCC_2:
+ return X86::JCC_1;
+ case X86::JCC_4:
+ return X86::JCC_1;
+ }
+ }
+
MCPhysReg getIntArgRegister(unsigned ArgNo) const override {
// FIXME: this should depend on the calling convention.
switch (ArgNo) {