From b4a8c0ebb6d49f757c687833d85f843aaeb19133 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Thu, 18 Mar 2021 15:32:29 -0700 Subject: [PATCH] [LTO][MC] Discard non-prevailing defined symbols in module-level assembly This is the alternative approach to D96931. In LTO, for each module with inlineasm block, prepend directive ".lto_discard , *" to the beginning of the inline asm. ".lto_discard" is both a module inlineasm block marker and (optionally) provides a list of symbols to be discarded. In MC while emitting for inlineasm, discard symbol binding & symbol definitions according to ".lto_disard". Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D98762 --- llvm/include/llvm/MC/MCContext.h | 1 - llvm/include/llvm/MC/MCParser/MCAsmParser.h | 2 + llvm/lib/LTO/LTO.cpp | 30 +++++++++- llvm/lib/MC/MCParser/AsmParser.cpp | 48 ++++++++++++++- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 6 ++ llvm/test/LTO/X86/inline-asm-lto-discard.ll | 87 ++++++++++++++++++++++++++++ llvm/test/LTO/X86/inline-asm-lto-discard2.ll | 29 ++++++++++ llvm/test/MC/ELF/lto-discard.s | 31 ++++++++++ 8 files changed, 231 insertions(+), 3 deletions(-) create mode 100644 llvm/test/LTO/X86/inline-asm-lto-discard.ll create mode 100644 llvm/test/LTO/X86/inline-asm-lto-discard2.ll create mode 100644 llvm/test/MC/ELF/lto-discard.s diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 106763c..f07e5a8 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -396,7 +396,6 @@ namespace llvm { void initInlineSourceManager(); SourceMgr *getInlineSourceManager() { - assert(InlineSrcMgr); return InlineSrcMgr.get(); } std::vector &getLocInfos() { return LocInfos; } diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index 02cc220..24d4ada 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -182,6 +182,8 @@ public: virtual void setParsingMSInlineAsm(bool V) = 0; virtual bool isParsingMSInlineAsm() = 0; + virtual bool discardLTOSymbol(StringRef) const { return false; } + virtual bool isParsingMasm() const { return false; } virtual bool defineMacro(StringRef Name, StringRef Value) { return true; } diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 8bcb160..3cd8c78 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -11,7 +11,9 @@ //===----------------------------------------------------------------------===// #include "llvm/LTO/LTO.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -752,6 +754,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, Skip(); std::set NonPrevailingComdats; + SmallSet NonPrevailingAsmSymbols; for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; @@ -798,7 +801,14 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes:: DefaultStorageClass); } + } else if (auto *AS = Msym.dyn_cast()) { + // Collect non-prevailing symbols. + if (!Res.Prevailing) + NonPrevailingAsmSymbols.insert(AS->first); + } else { + llvm_unreachable("unknown symbol type"); } + // Common resolution: collect the maximum size/alignment over all commons. // We also record if we see an instance of a common as prevailing, so that // if none is prevailing we can ignore it later. @@ -812,11 +822,29 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, CommonRes.Align = max(*SymAlign, CommonRes.Align); CommonRes.Prevailing |= Res.Prevailing; } - } + if (!M.getComdatSymbolTable().empty()) for (GlobalValue &GV : M.global_values()) handleNonPrevailingComdat(GV, NonPrevailingComdats); + + // Prepend ".lto_discard , *" directive to each module inline asm + // block. + if (!M.getModuleInlineAsm().empty()) { + std::string NewIA = ".lto_discard"; + if (!NonPrevailingAsmSymbols.empty()) { + // Don't dicard a symbol if there is a live .symver for it. + ModuleSymbolTable::CollectAsmSymvers( + M, [&](StringRef Name, StringRef Alias) { + if (!NonPrevailingAsmSymbols.count(Alias)) + NonPrevailingAsmSymbols.erase(Name); + }); + NewIA += " " + llvm::join(NonPrevailingAsmSymbols, ", "); + } + NewIA += "\n"; + M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm()); + } + assert(MsymI == MsymE); return std::move(Mod); } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 3ef51e6..261d1e9 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -168,6 +169,8 @@ private: /// List of forward directional labels for diagnosis at the end. SmallVector, 4> DirLabels; + SmallSet LTODiscardSymbols; + /// AssemblerDialect. ~OU means unset value and use value provided by MAI. unsigned AssemblerDialect = ~0U; @@ -235,6 +238,10 @@ public: } bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; } + bool discardLTOSymbol(StringRef Name) const override { + return LTODiscardSymbols.contains(Name); + } + bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl> &OpDecls, @@ -516,6 +523,7 @@ private: DK_ADDRSIG, DK_ADDRSIG_SYM, DK_PSEUDO_PROBE, + DK_LTO_DISCARD, DK_END }; @@ -682,6 +690,9 @@ private: // .pseudoprobe bool parseDirectivePseudoProbe(); + // ".lto_discard" + bool parseDirectiveLTODiscard(); + // Directives to support address-significance tables. bool parseDirectiveAddrsig(); bool parseDirectiveAddrsigSym(); @@ -892,6 +903,8 @@ bool AsmParser::enabledGenDwarfForAssembly() { } bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { + LTODiscardSymbols.clear(); + // Create the initial section, if requested. if (!NoInitialTextSection) Out.InitSections(false); @@ -1770,7 +1783,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, StringMap::const_iterator DirKindIt = DirectiveKindMap.find(IDVal.lower()); DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end()) - ? DK_NO_DIRECTIVE : DirKindIt->getValue(); switch (DirKind) { @@ -1868,6 +1880,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, Lex(); } + if (discardLTOSymbol(IDVal)) + return false; + getTargetParser().doBeforeLabelEmit(Sym); // Emit the label. @@ -2208,6 +2223,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveAddrsigSym(); case DK_PSEUDO_PROBE: return parseDirectivePseudoProbe(); + case DK_LTO_DISCARD: + return parseDirectiveLTODiscard(); } return Error(IDLoc, "unknown directive"); @@ -2852,6 +2869,9 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef, return false; } + if (discardLTOSymbol(Name)) + return false; + // Do the assignment. Out.emitAssignment(Sym, Value); if (NoDeadStrip) @@ -4870,6 +4890,10 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { SMLoc Loc = getTok().getLoc(); if (parseIdentifier(Name)) return Error(Loc, "expected identifier"); + + if (discardLTOSymbol(Name)) + return false; + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); // Assembler local symbols don't make any sense here. Complain loudly. @@ -5493,6 +5517,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".addrsig"] = DK_ADDRSIG; DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM; DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE; + DirectiveKindMap[".lto_discard"] = DK_LTO_DISCARD; } MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { @@ -5806,6 +5831,27 @@ bool AsmParser::parseDirectivePseudoProbe() { return false; } +/// parseDirectiveLTODiscard +/// ::= ".lto_discard" [ identifier ( , identifier )* ] +/// The LTO library emits this directive to discard non-prevailing symbols. +/// We ignore symbol assignments and attribute changes for the specified +/// symbols. +bool AsmParser::parseDirectiveLTODiscard() { + auto ParseOp = [&]() -> bool { + StringRef Name; + SMLoc Loc = getTok().getLoc(); + if (parseIdentifier(Name)) + return Error(Loc, "expected identifier"); + LTODiscardSymbols.insert(Name); + return false; + }; + + LTODiscardSymbols.clear(); + if (parseMany(ParseOp)) + return addErrorSuffix(" in directive"); + return false; +} + // We are comparing pointers, but the pointers are relative to a single string. // Thus, this should always be deterministic. static int rewritesSort(const AsmRewrite *AsmRewriteA, diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 5b3f022..70d69fc 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -182,6 +182,12 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); + if (getParser().discardLTOSymbol(Name)) { + if (getLexer().is(AsmToken::EndOfStatement)) + break; + continue; + } + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitSymbolAttribute(Sym, Attr); diff --git a/llvm/test/LTO/X86/inline-asm-lto-discard.ll b/llvm/test/LTO/X86/inline-asm-lto-discard.ll new file mode 100644 index 0000000..4893eb1 --- /dev/null +++ b/llvm/test/LTO/X86/inline-asm-lto-discard.ll @@ -0,0 +1,87 @@ +; Check that non-prevailing symbols in module inline assembly are discarded +; during regular LTO otherwise the final symbol binding could be wrong. + +; RUN: split-file %s %t +; RUN: opt %t/t1.ll -o %t1 +; RUN: opt %t/t2.ll -o %t2 +; RUN: opt %t/t3.ll -o %t3 +; RUN: opt %t/t4.ll -o %t4 + +; RUN: llvm-lto2 run -o %to1 -save-temps %t1 %t2 \ +; RUN: -r %t1,foo,px \ +; RUN: -r %t2,foo, \ +; RUN: -r %t2,bar,pl +; RUN: llvm-dis < %to1.0.0.preopt.bc | FileCheck %s --check-prefix=ASM1 +; RUN: llvm-nm %to1.0 | FileCheck %s --check-prefix=SYM +; RUN: llvm-objdump -d --disassemble-symbols=foo %to1.0 \ +; RUN: | FileCheck %s --check-prefix=DEF + +; RUN: llvm-lto2 run -o %to2 -save-temps %t2 %t3 \ +; RUN: -r %t2,foo, \ +; RUN: -r %t2,bar,pl \ +; RUN: -r %t3,foo,px +; RUN: llvm-dis < %to2.0.0.preopt.bc | FileCheck %s --check-prefix=ASM2 +; RUN: llvm-nm %to2.0 | FileCheck %s --check-prefix=SYM +; RUN: llvm-objdump -d --disassemble-symbols=foo %to2.0 \ +; RUN: | FileCheck %s --check-prefix=DEF + +; Check that ".symver" is properly handled. +; RUN: llvm-lto2 run -o %to3 -save-temps %t4 \ +; RUN: -r %t4,bar, \ +; RUN: -r %t4,foo, \ +; RUN: -r %t4,foo@@VER1,px +; RUN: llvm-dis < %to3.0.0.preopt.bc | FileCheck %s --check-prefix=ASM3 + +; ASM1: module asm ".lto_discard foo" +; ASM1-NEXT: module asm ".weak foo" +; ASM1-NEXT: module asm ".equ foo,bar" + +; ASM2: module asm ".lto_discard foo" +; ASM2-NEXT: module asm ".weak foo" +; ASM2-NEXT: module asm ".equ foo,bar" +; ASM2-NEXT: module asm ".lto_discard" +; ASM2-NEXT: module asm " .global foo ; foo: leal 2(%rdi), %eax" + +; ASM3-NOT: module asm ".lto_discard foo" + +; SYM: T foo + +; DEF: leal 2(%rdi), %eax + +;--- t1.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local i32 @foo(i32 %0) { + %2 = add nsw i32 %0, 2 + ret i32 %2 +} + +;--- t2.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +module asm ".weak foo" +module asm ".equ foo,bar" + +@llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @bar to i8*)], section "llvm.metadata" + +define internal i32 @bar(i32 %0) { + %2 = add nsw i32 %0, 1 + ret i32 %2 +} + +;--- t3.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +module asm " .global foo ; foo: leal 2(%rdi), %eax" + +;--- t4.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +module asm ".global foo" +module asm "foo: call bar" +module asm ".symver foo,foo@@@VER1" +module asm ".symver bar,bar@@@VER1" diff --git a/llvm/test/LTO/X86/inline-asm-lto-discard2.ll b/llvm/test/LTO/X86/inline-asm-lto-discard2.ll new file mode 100644 index 0000000..5d111d0 --- /dev/null +++ b/llvm/test/LTO/X86/inline-asm-lto-discard2.ll @@ -0,0 +1,29 @@ +; Check that +; 1. ".lto_discard" works as module inlineasm marker and its argument symbols +; are discarded. +; 2. there is no reassignment error in the presence of ".lto_discard" +; RUN: llc < %s | FileCheck %s + +; CHECK: .data +; CHECK-NOT: .weak foo +; CHECK-NOT: .set foo, bar +; CHECK: .globl foo +; CHECK: foo: +; CHECK: .byte 1 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +module asm ".lto_discard foo" +module asm " .text" +module asm "bar:" +module asm " .data" +module asm ".weak foo" +module asm ".set foo, bar" +module asm ".weak foo" +module asm ".set foo, bar" + +module asm ".lto_discard" +module asm ".globl foo" +module asm "foo:" +module asm " .byte 1" diff --git a/llvm/test/MC/ELF/lto-discard.s b/llvm/test/MC/ELF/lto-discard.s new file mode 100644 index 0000000..75a7d7e --- /dev/null +++ b/llvm/test/MC/ELF/lto-discard.s @@ -0,0 +1,31 @@ +// Check that ".lto_discard" ignores symbol assignments and attribute changes +// for the specified symbols. +// RUN: llvm-mc -triple x86_64 < %s | FileCheck %s + +// Check that ".lto_discard" only accepts identifiers. +// RUN: not llvm-mc -filetype=obj -triple x86_64 --defsym ERR=1 %s 2>&1 |\ +// RUN: FileCheck %s --check-prefix=ERR + +// CHECK: .weak foo +// CHECK: foo: +// CHECK: .byte 1 +// CHECK: .weak bar +// CHECK: bar: +// CHECK: .byte 2 + +.lto_discard foo +.weak foo +foo: + .byte 1 + +.lto_discard +.weak bar +bar: + .byte 2 + + +.ifdef ERR +.text +# ERR: {{.*}}.s:[[#@LINE+1]]:14: error: expected identifier in directive +.lto_discard 1 +.endif -- 2.7.4