//===----------------------------------------------------------------------===//
#include "InputFiles.h"
+#include "OutputSections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
uint64_t pltEntryAddr) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
+ void applyJumpInstrMod(uint8_t *loc, JumpModType type,
+ unsigned size) const override;
RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
RelExpr expr) const override;
uint64_t val) const override;
bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
uint8_t stOther) const override;
+ bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
+ InputSection *nextIS) const override;
};
} // namespace
+// This is vector of NOP instructions of sizes from 1 to 8 bytes. The
+// appropriately sized instructions are used to fill the gaps between sections
+// which are executed during fall through.
+static const std::vector<std::vector<uint8_t>> nopInstructions = {
+ {0x90},
+ {0x66, 0x90},
+ {0x0f, 0x1f, 0x00},
+ {0x0f, 0x1f, 0x40, 0x00},
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+ {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};
+
X86_64::X86_64() {
copyRel = R_X86_64_COPY;
gotRel = R_X86_64_GLOB_DAT;
pltEntrySize = 16;
ipltEntrySize = 16;
trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
+ nopInstrs = nopInstructions;
// Align to the large page size (known as a superpage or huge page).
// FreeBSD automatically promotes large, superpage-aligned allocations.
int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
+// Opcodes for the different X86_64 jmp instructions.
+enum JmpInsnOpcode : uint32_t {
+ J_JMP_32,
+ J_JNE_32,
+ J_JE_32,
+ J_JG_32,
+ J_JGE_32,
+ J_JB_32,
+ J_JBE_32,
+ J_JL_32,
+ J_JLE_32,
+ J_JA_32,
+ J_JAE_32,
+ J_UNKNOWN,
+};
+
+// Given the first (optional) and second byte of the insn's opcode, this
+// returns the corresponding enum value.
+static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
+ const uint8_t *second) {
+ if (*second == 0xe9)
+ return J_JMP_32;
+
+ if (first == nullptr)
+ return J_UNKNOWN;
+
+ if (*first == 0x0f) {
+ switch (*second) {
+ case 0x84:
+ return J_JE_32;
+ case 0x85:
+ return J_JNE_32;
+ case 0x8f:
+ return J_JG_32;
+ case 0x8d:
+ return J_JGE_32;
+ case 0x82:
+ return J_JB_32;
+ case 0x86:
+ return J_JBE_32;
+ case 0x8c:
+ return J_JL_32;
+ case 0x8e:
+ return J_JLE_32;
+ case 0x87:
+ return J_JA_32;
+ case 0x83:
+ return J_JAE_32;
+ }
+ }
+ return J_UNKNOWN;
+}
+
+// Return the relocation index for input section IS with a specific Offset.
+// Returns the maximum size of the vector if no such relocation is found.
+static unsigned getRelocationWithOffset(const InputSection &is,
+ uint64_t offset) {
+ unsigned size = is.relocations.size();
+ for (unsigned i = size - 1; i + 1 > 0; --i) {
+ if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE)
+ return i;
+ }
+ return size;
+}
+
+// Returns true if R corresponds to a relocation used for a jump instruction.
+// TODO: Once special relocations for relaxable jump instructions are available,
+// this should be modified to use those relocations.
+static bool isRelocationForJmpInsn(Relocation &R) {
+ return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||
+ R.type == R_X86_64_PC8;
+}
+
+// Return true if Relocation R points to the first instruction in the
+// next section.
+// TODO: Delete this once psABI reserves a new relocation type for fall thru
+// jumps.
+static bool isFallThruRelocation(InputSection &is, InputFile *file,
+ InputSection *nextIS, Relocation &r) {
+ if (!isRelocationForJmpInsn(r))
+ return false;
+
+ uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
+ uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
+ file, r.type, r.addend, addrLoc, *r.sym, r.expr);
+
+ // If this jmp is a fall thru, the target offset is the beginning of the
+ // next section.
+ uint64_t nextSectionOffset =
+ nextIS->getOutputSection()->addr + nextIS->outSecOff;
+ return (addrLoc + 4 + targetOffset) == nextSectionOffset;
+}
+
+// Return the jmp instruction opcode that is the inverse of the given
+// opcode. For example, JE inverted is JNE.
+static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
+ switch (opcode) {
+ case J_JE_32:
+ return J_JNE_32;
+ case J_JNE_32:
+ return J_JE_32;
+ case J_JG_32:
+ return J_JLE_32;
+ case J_JGE_32:
+ return J_JL_32;
+ case J_JB_32:
+ return J_JAE_32;
+ case J_JBE_32:
+ return J_JA_32;
+ case J_JL_32:
+ return J_JGE_32;
+ case J_JLE_32:
+ return J_JG_32;
+ case J_JA_32:
+ return J_JBE_32;
+ case J_JAE_32:
+ return J_JB_32;
+ default:
+ return J_UNKNOWN;
+ }
+}
+
+// Deletes direct jump instruction in input sections that jumps to the
+// following section as it is not required. If there are two consecutive jump
+// instructions, it checks if they can be flipped and one can be deleted.
+// For example:
+// .section .text
+// a.BB.foo:
+// ...
+// 10: jne aa.BB.foo
+// 16: jmp bar
+// aa.BB.foo:
+// ...
+//
+// can be converted to:
+// a.BB.foo:
+// ...
+// 10: je bar #jne flipped to je and the jmp is deleted.
+// aa.BB.foo:
+// ...
+bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
+ InputSection *nextIS) const {
+ const unsigned sizeOfDirectJmpInsn = 5;
+
+ if (nextIS == nullptr)
+ return false;
+
+ if (is.getSize() < sizeOfDirectJmpInsn)
+ return false;
+
+ // If this jmp insn can be removed, it is the last insn and the
+ // relocation is 4 bytes before the end.
+ unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);
+ if (rIndex == is.relocations.size())
+ return false;
+
+ Relocation &r = is.relocations[rIndex];
+
+ // Check if the relocation corresponds to a direct jmp.
+ const uint8_t *secContents = is.data().data();
+ // If it is not a direct jmp instruction, there is nothing to do here.
+ if (*(secContents + r.offset - 1) != 0xe9)
+ return false;
+
+ if (isFallThruRelocation(is, file, nextIS, r)) {
+ // This is a fall thru and can be deleted.
+ r.expr = R_NONE;
+ r.offset = 0;
+ is.drop_back(sizeOfDirectJmpInsn);
+ is.nopFiller = true;
+ return true;
+ }
+
+ // Now, check if flip and delete is possible.
+ const unsigned sizeOfJmpCCInsn = 6;
+ // To flip, there must be atleast one JmpCC and one direct jmp.
+ if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
+ return 0;
+
+ unsigned rbIndex =
+ getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));
+ if (rbIndex == is.relocations.size())
+ return 0;
+
+ Relocation &rB = is.relocations[rbIndex];
+
+ const uint8_t *jmpInsnB = secContents + rB.offset - 1;
+ JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);
+ if (jmpOpcodeB == J_UNKNOWN)
+ return false;
+
+ if (!isFallThruRelocation(is, file, nextIS, rB))
+ return false;
+
+ // jmpCC jumps to the fall thru block, the branch can be flipped and the
+ // jmp can be deleted.
+ JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);
+ if (jInvert == J_UNKNOWN)
+ return false;
+ is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4});
+ // Move R's values to rB except the offset.
+ rB = {r.expr, r.type, rB.offset, r.addend, r.sym};
+ // Cancel R
+ r.expr = R_NONE;
+ r.offset = 0;
+ is.drop_back(sizeOfDirectJmpInsn);
+ is.nopFiller = true;
+ return true;
+}
+
RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
if (type == R_X86_64_GOTTPOFF)
"expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
}
+// A JumpInstrMod at a specific offset indicates that the jump instruction
+// opcode at that offset must be modified. This is specifically used to relax
+// jump instructions with basic block sections. This function looks at the
+// JumpMod and effects the change.
+void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
+ unsigned size) const {
+ switch (type) {
+ case J_JMP_32:
+ if (size == 4)
+ *loc = 0xe9;
+ else
+ *loc = 0xeb;
+ break;
+ case J_JE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x84;
+ } else
+ *loc = 0x74;
+ break;
+ case J_JNE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x85;
+ } else
+ *loc = 0x75;
+ break;
+ case J_JG_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8f;
+ } else
+ *loc = 0x7f;
+ break;
+ case J_JGE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8d;
+ } else
+ *loc = 0x7d;
+ break;
+ case J_JB_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x82;
+ } else
+ *loc = 0x72;
+ break;
+ case J_JBE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x86;
+ } else
+ *loc = 0x76;
+ break;
+ case J_JL_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8c;
+ } else
+ *loc = 0x7c;
+ break;
+ case J_JLE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8e;
+ } else
+ *loc = 0x7e;
+ break;
+ case J_JA_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x87;
+ } else
+ *loc = 0x77;
+ break;
+ case J_JAE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x83;
+ } else
+ *loc = 0x73;
+ break;
+ case J_UNKNOWN:
+ llvm_unreachable("Unknown Jump Relocation");
+ }
+}
+
void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
switch (rel.type) {
case R_X86_64_8:
llvm::StringRef sysroot;
llvm::StringRef thinLTOCacheDir;
llvm::StringRef thinLTOIndexOnlyArg;
+ llvm::StringRef ltoBasicBlockSections;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOObjectSuffixReplace;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOPrefixReplace;
std::string rpath;
bool ltoCSProfileGenerate;
bool ltoDebugPassManager;
bool ltoNewPassManager;
+ bool ltoUniqueBBSectionNames;
bool ltoWholeProgramVisibility;
bool mergeArmExidx;
bool mipsN32Abi = false;
bool nostdlib;
bool oFormatBinary;
bool omagic;
+ bool optimizeBBJumps;
bool optRemarksWithHotness;
bool picThunk;
bool pie;
config->cref = args.hasFlag(OPT_cref, OPT_no_cref, false);
config->defineCommon = args.hasFlag(OPT_define_common, OPT_no_define_common,
!args.hasArg(OPT_relocatable));
+ config->optimizeBBJumps =
+ args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
config->disableVerify = args.hasArg(OPT_disable_verify);
config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
+ config->ltoBasicBlockSections =
+ args.getLastArgValue(OPT_lto_basicblock_sections);
+ config->ltoUniqueBBSectionNames =
+ args.hasFlag(OPT_lto_unique_bb_section_names,
+ OPT_no_lto_unique_bb_section_names, false);
config->mapFile = args.getLastArgValue(OPT_Map);
config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0);
config->mergeArmExidx =
return s->getSize();
if (uncompressedSize >= 0)
return uncompressedSize;
- return rawData.size();
+ return rawData.size() - bytesDropped;
}
void InputSectionBase::uncompress() const {
}
}
-static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a,
- uint64_t p, const Symbol &sym, RelExpr expr) {
+uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
+ int64_t a, uint64_t p,
+ const Symbol &sym, RelExpr expr) {
switch (expr) {
case R_ABS:
case R_DTPREL:
if (expr == R_NONE)
continue;
+ if (expr == R_SIZE) {
+ target->relocateNoSym(bufLoc, type,
+ SignExtend64<bits>(sym.getSize() + addend));
+ continue;
+ }
+
if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) {
std::string msg = getLocation<ELFT>(offset) +
": has non-ABS relocation " + toString(type) +
const unsigned bits = config->wordsize * 8;
for (const Relocation &rel : relocations) {
+ if (rel.expr == R_NONE)
+ continue;
uint64_t offset = rel.offset;
if (auto *sec = dyn_cast<InputSection>(this))
offset += sec->outSecOff;
break;
}
}
+
+ // Apply jumpInstrMods. jumpInstrMods are created when the opcode of
+ // a jmp insn must be modified to shrink the jmp insn or to flip the jmp
+ // insn. This is primarily used to relax and optimize jumps created with
+ // basic block sections.
+ if (auto *sec = dyn_cast<InputSection>(this)) {
+ for (const JumpInstrMod &jumpMod : jumpInstrMods) {
+ uint64_t offset = jumpMod.offset + sec->outSecOff;
+ uint8_t *bufLoc = buf + offset;
+ target->applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size);
+ }
+ }
}
// For each function-defining prologue, find any calls to __morestack,
return cast_or_null<ObjFile<ELFT>>(file);
}
+ // If basic block sections are enabled, many code sections could end up with
+ // one or two jump instructions at the end that could be relaxed to a smaller
+ // instruction. The members below help trimming the trailing jump instruction
+ // and shrinking a section.
+ unsigned bytesDropped = 0;
+
+ void drop_back(uint64_t num) { bytesDropped += num; }
+
+ void push_back(uint64_t num) {
+ assert(bytesDropped >= num);
+ bytesDropped -= num;
+ }
+
+ void trim() {
+ if (bytesDropped) {
+ rawData = rawData.drop_back(bytesDropped);
+ bytesDropped = 0;
+ }
+ }
+
ArrayRef<uint8_t> data() const {
if (uncompressedSize >= 0)
uncompress();
// the mmap'ed output buffer.
template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd);
void relocateAlloc(uint8_t *buf, uint8_t *bufEnd);
+ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type,
+ int64_t A, uint64_t P, const Symbol &Sym,
+ RelExpr Expr);
// The native ELF reloc data type is not very convenient to handle.
// So we convert ELF reloc records to our own records in Relocations.cpp.
// This vector contains such "cooked" relocations.
std::vector<Relocation> relocations;
+ // Indicates that this section needs to be padded with a NOP filler if set to
+ // true.
+ bool nopFiller = false;
+
+ // These are modifiers to jump instructions that are necessary when basic
+ // block sections are enabled. Basic block sections creates opportunities to
+ // relax jump instructions at basic block boundaries after reordering the
+ // basic blocks.
+ std::vector<JumpInstrMod> jumpInstrMods;
+
// A function compiled with -fsplit-stack calling a function
// compiled without -fsplit-stack needs its prologue adjusted. Find
// such functions and adjust their prologues. This is very similar
c.Options.FunctionSections = true;
c.Options.DataSections = true;
+ // Check if basic block sections must be used.
+ // Allowed values for --lto-basicblock-sections are "all", "labels",
+ // "<file name specifying basic block ids>", or none. This is the equivalent
+ // of -fbasicblock-sections= flag in clang.
+ if (!config->ltoBasicBlockSections.empty()) {
+ if (config->ltoBasicBlockSections == "all") {
+ c.Options.BBSections = BasicBlockSection::All;
+ } else if (config->ltoBasicBlockSections == "labels") {
+ c.Options.BBSections = BasicBlockSection::Labels;
+ } else if (config->ltoBasicBlockSections == "none") {
+ c.Options.BBSections = BasicBlockSection::None;
+ } else {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(config->ltoBasicBlockSections.str());
+ if (!MBOrErr) {
+ error("cannot open " + config->ltoBasicBlockSections + ":" +
+ MBOrErr.getError().message());
+ } else {
+ c.Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
+ }
+ c.Options.BBSections = BasicBlockSection::List;
+ }
+ }
+
+ c.Options.UniqueBBSectionNames = config->ltoUniqueBBSectionNames;
+
if (auto relocModel = getRelocModelFromCMModel())
c.RelocModel = *relocModel;
else if (config->relocatable)
defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
+defm optimize_bb_jumps: B<"optimize-bb-jumps",
+ "Remove direct jumps at the end to the next basic block",
+ "Do not remove any direct jumps at the end to the next basic block (default)">;
+
defm split_stack_adjust_size
: Eq<"split-stack-adjust-size",
"Specify adjustment to stack size when a split-stack function calls a "
HelpText<"The format used for serializing remarks (default: YAML)">;
defm plugin_opt: Eq<"plugin-opt", "specifies LTO options for compatibility with GNU linkers">;
def save_temps: F<"save-temps">;
+def lto_basicblock_sections: J<"lto-basicblock-sections=">,
+ HelpText<"Enable basic block sections for LTO">;
+defm lto_unique_bb_section_names: B<"lto-unique-bb-section-names",
+ "Give unique names to every basic block section for LTO",
+ "Do not give unique names to every basic block section for LTO (default)">;
def shuffle_sections: J<"shuffle-sections=">, MetaVarName<"<seed>">,
HelpText<"Shuffle input sections using the given seed. If 0, use a random seed">;
def thinlto_cache_dir: J<"thinlto-cache-dir=">,
sortByOrder(isd->sections, order);
}
+static void nopInstrFill(uint8_t *buf, size_t size) {
+ if (size == 0)
+ return;
+ unsigned i = 0;
+ if (size == 0)
+ return;
+ std::vector<std::vector<uint8_t>> nopFiller = *target->nopInstrs;
+ unsigned num = size / nopFiller.back().size();
+ for (unsigned c = 0; c < num; ++c) {
+ memcpy(buf + i, nopFiller.back().data(), nopFiller.back().size());
+ i += nopFiller.back().size();
+ }
+ unsigned remaining = size - i;
+ if (!remaining)
+ return;
+ assert(nopFiller[remaining - 1].size() == remaining);
+ memcpy(buf + i, nopFiller[remaining - 1].data(), remaining);
+}
+
// Fill [Buf, Buf + Size) with Filler.
// This is used for linker script "=fillexp" command.
static void fill(uint8_t *buf, size_t size,
end = buf + size;
else
end = buf + sections[i + 1]->outSecOff;
- fill(start, end - start, filler);
+ if (isec->nopFiller) {
+ assert(target->nopInstrs);
+ nopInstrFill(start, end - start);
+ } else
+ fill(start, end - start, filler);
}
});
// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
using RelType = uint32_t;
+using JumpModType = uint32_t;
// List of target-independent relocation types. Relocations read
// from files are converted to these types so that the main code
Symbol *sym;
};
+// Manipulate jump instructions with these modifiers. These are used to relax
+// jump instruction opcodes at basic block boundaries and are particularly
+// useful when basic block sections are enabled.
+struct JumpInstrMod {
+ JumpModType original;
+ uint64_t offset;
+ unsigned size;
+};
+
// This function writes undefined symbol diagnostics to an internal buffer.
// Call reportUndefinedSymbols() after calling scanRelocations() to emit
// the diagnostics.
relocate(loc, Relocation{R_NONE, type, 0, 0, nullptr}, val);
}
+ virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
+ JumpModType val) const {}
+
virtual ~TargetInfo();
+ // This deletes a jump insn at the end of the section if it is a fall thru to
+ // the next section. Further, if there is a conditional jump and a direct
+ // jump consecutively, it tries to flip the conditional jump to convert the
+ // direct jump into a fall thru and delete it. Returns true if a jump
+ // instruction can be deleted.
+ virtual bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
+ InputSection *nextIS) const {
+ return false;
+ }
+
unsigned defaultCommonPageSize = 4096;
unsigned defaultMaxPageSize = 4096;
// executable OutputSections.
std::array<uint8_t, 4> trapInstr;
+ // Stores the NOP instructions of different sizes for the target and is used
+ // to pad sections that are relaxed.
+ llvm::Optional<std::vector<std::vector<uint8_t>>> nopInstrs;
+
// If a target needs to rewrite calls to __morestack to instead call
// __morestack_non_split when a split-stack enabled caller calls a
// non-split-stack callee this will return true. Otherwise returns false.
#include "llvm/Support/xxhash.h"
#include <climits>
+#define DEBUG_TYPE "lld"
+
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
void sortSections();
void resolveShfLinkOrder();
void finalizeAddressDependentContent();
+ void optimizeBasicBlockJumps();
void sortInputSections();
void finalizeSections();
void checkExecuteOnly();
Twine(os->alignment) + ")");
}
+// If Input Sections have been shrinked (basic block sections) then
+// update symbol values and sizes associated with these sections. With basic
+// block sections, input sections can shrink when the jump instructions at
+// the end of the section are relaxed.
+static void fixSymbolsAfterShrinking() {
+ for (InputFile *File : objectFiles) {
+ parallelForEach(File->getSymbols(), [&](Symbol *Sym) {
+ auto *def = dyn_cast<Defined>(Sym);
+ if (!def)
+ return;
+
+ const SectionBase *sec = def->section;
+ if (!sec)
+ return;
+
+ const InputSectionBase *inputSec = dyn_cast<InputSectionBase>(sec->repl);
+ if (!inputSec || !inputSec->bytesDropped)
+ return;
+
+ const size_t OldSize = inputSec->data().size();
+ const size_t NewSize = OldSize - inputSec->bytesDropped;
+
+ if (def->value > NewSize && def->value <= OldSize) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Moving symbol " << Sym->getName() << " from "
+ << def->value << " to "
+ << def->value - inputSec->bytesDropped << " bytes\n");
+ def->value -= inputSec->bytesDropped;
+ return;
+ }
+
+ if (def->value + def->size > NewSize && def->value <= OldSize &&
+ def->value + def->size <= OldSize) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Shrinking symbol " << Sym->getName() << " from "
+ << def->size << " to " << def->size - inputSec->bytesDropped
+ << " bytes\n");
+ def->size -= inputSec->bytesDropped;
+ }
+ });
+ }
+}
+
+// If basic block sections exist, there are opportunities to delete fall thru
+// jumps and shrink jump instructions after basic block reordering. This
+// relaxation pass does that. It is only enabled when --optimize-bb-jumps
+// option is used.
+template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() {
+ assert(config->optimizeBBJumps);
+
+ script->assignAddresses();
+ // For every output section that has executable input sections, this
+ // does the following:
+ // 1. Deletes all direct jump instructions in input sections that
+ // jump to the following section as it is not required.
+ // 2. If there are two consecutive jump instructions, it checks
+ // if they can be flipped and one can be deleted.
+ for (OutputSection *os : outputSections) {
+ if (!(os->flags & SHF_EXECINSTR))
+ continue;
+ std::vector<InputSection *> sections = getInputSections(os);
+ std::vector<unsigned> result(sections.size());
+ // Delete all fall through jump instructions. Also, check if two
+ // consecutive jump instructions can be flipped so that a fall
+ // through jmp instruction can be deleted.
+ parallelForEachN(0, sections.size(), [&](size_t i) {
+ InputSection *next = i + 1 < sections.size() ? sections[i + 1] : nullptr;
+ InputSection &is = *sections[i];
+ result[i] =
+ target->deleteFallThruJmpInsn(is, is.getFile<ELFT>(), next) ? 1 : 0;
+ });
+ size_t numDeleted = std::count(result.begin(), result.end(), 1);
+ if (numDeleted > 0) {
+ script->assignAddresses();
+ LLVM_DEBUG(llvm::dbgs()
+ << "Removing " << numDeleted << " fall through jumps\n");
+ }
+ }
+
+ fixSymbolsAfterShrinking();
+
+ for (OutputSection *os : outputSections) {
+ std::vector<InputSection *> sections = getInputSections(os);
+ for (InputSection *is : sections)
+ is->trim();
+ }
+}
+
static void finalizeSynthetic(SyntheticSection *sec) {
if (sec && sec->isNeeded() && sec->getParent())
sec->finalizeContents();
finalizeSynthetic(in.symTab);
finalizeSynthetic(in.ppc64LongBranchTarget);
+ // Relaxation to delete inter-basic block jumps created by basic block
+ // sections. Run after in.symTab is finalized as optimizeBasicBlockJumps
+ // can relax jump instructions based on symbol offset.
+ if (config->optimizeBBJumps)
+ optimizeBasicBlockJumps();
+
// Fill other section headers. The dynamic table is finalized
// at the end because some tags like RELSZ depend on result
// of finalizing other sections.
--- /dev/null
+# REQUIRES: x86
+## basicblock-sections tests.
+## This simple test checks foo is folded into bar with bb sections
+## and the jumps are deleted.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: ld.lld --optimize-bb-jumps --icf=all %t.o -o %t.out
+# RUN: llvm-objdump -d %t.out| FileCheck %s
+
+# CHECK: <foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: je 0x{{[[:xdigit:]]+}} <aa.BB.foo>
+# CHECK-NOT: jmp
+
+# CHECK: <a.BB.foo>:
+## Explicity check that bar is folded and not emitted.
+# CHECK-NOT: <bar>:
+# CHECK-NOT: <a.BB.bar>:
+# CHECK-NOT: <aa.BB.bar>:
+
+.section .text.bar,"ax",@progbits
+.type bar,@function
+bar:
+ nopl (%rax)
+ jne a.BB.bar
+ jmp aa.BB.bar
+
+.section .text.a.BB.bar,"ax",@progbits,unique,3
+a.BB.bar:
+ nopl (%rax)
+
+aa.BB.bar:
+ ret
+
+.section .text.foo,"ax",@progbits
+.type foo,@function
+foo:
+ nopl (%rax)
+ jne a.BB.foo
+ jmp aa.BB.foo
+
+.section .text.a.BB.foo,"ax",@progbits,unique,2
+a.BB.foo:
+ nopl (%rax)
+
+aa.BB.foo:
+ ret
--- /dev/null
+# REQUIRES: x86
+## basicblock-sections tests.
+## This simple test checks if redundant direct jumps are converted to
+## implicit fallthrus. The jcc's must be converted to their inverted
+## opcode, for instance jne to je and jmp must be deleted.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: ld.lld --optimize-bb-jumps %t.o -o %t.out
+# RUN: llvm-objdump -d %t.out| FileCheck %s
+
+# CHECK: <foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jne 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+
+
+.section .text,"ax",@progbits
+.type foo,@function
+foo:
+ nopl (%rax)
+ je a.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <a.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: je 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+
+.section .text,"ax",@progbits,unique,3
+a.BB.foo:
+ nopl (%rax)
+ jne aa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jle 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,4
+aa.BB.foo:
+ nopl (%rax)
+ jg aaa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aaa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jl 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,5
+aaa.BB.foo:
+ nopl (%rax)
+ jge aaaa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aaaa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jae 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,6
+aaaa.BB.foo:
+ nopl (%rax)
+ jb aaaaa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aaaaa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: ja 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,7
+aaaaa.BB.foo:
+ nopl (%rax)
+ jbe aaaaaa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aaaaaa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jge 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,8
+aaaaaa.BB.foo:
+ nopl (%rax)
+ jl aaaaaaa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aaaaaaa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jg 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,9
+aaaaaaa.BB.foo:
+ nopl (%rax)
+ jle aaaaaaaa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aaaaaaaa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jbe 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,10
+aaaaaaaa.BB.foo:
+ nopl (%rax)
+ ja aaaaaaaaa.BB.foo
+ jmp r.BB.foo
+
+# CHECK: <aaaaaaaaa.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jb 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+#
+.section .text,"ax",@progbits,unique,11
+aaaaaaaaa.BB.foo:
+ nopl (%rax)
+ jae aaaaaaaaaa.BB.foo
+ jmp r.BB.foo
+
+.section .text,"ax",@progbits,unique,20
+aaaaaaaaaa.BB.foo:
+ nopl (%rax)
+
+r.BB.foo:
+ ret
--- /dev/null
+# REQUIRES: x86
+## basicblock-sections tests.
+## This simple test checks if redundant direct jumps are converted to
+## implicit fallthrus when PC32 reloc is present. The jcc's must be converted
+## to their inverted opcode, for instance jne to je and jmp must be deleted.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: llvm-objdump -dr %t.o| FileCheck %s --check-prefix=RELOC
+# RUN: ld.lld --optimize-bb-jumps %t.o -o %t.out
+# RUN: llvm-objdump -d %t.out| FileCheck %s
+
+# RELOC: jmp
+# RELOC-NEXT: R_X86_64_PC32
+
+# CHECK: <foo>:
+# CHECK-NEXT: nopl (%rax)
+# CHECK-NEXT: jne 0x{{[[:xdigit:]]+}} <r.BB.foo>
+# CHECK-NOT: jmp
+
+
+.section .text,"ax",@progbits
+.type foo,@function
+foo:
+ nopl (%rax)
+ je a.BB.foo
+# Encode a jmp r.BB.foo insn using a PC32 reloc
+ .byte 0xe9
+ .long r.BB.foo - . - 4
+
+# CHECK: <a.BB.foo>:
+# CHECK-NEXT: nopl (%rax)
+
+.section .text,"ax",@progbits,unique,3
+a.BB.foo:
+ nopl (%rax)
+r.BB.foo:
+ ret