From 0810f16fb9e89e9d7809a0e9a9a7296b35f8ef27 Mon Sep 17 00:00:00 2001 From: George Rimar Date: Thu, 4 Jul 2019 14:17:31 +0000 Subject: [PATCH] [LLD][ELF] - Linkerscript: add a support for expressions for section's filling Imagine the script: .section: { ... } = FILL_EXPR LLD assumes that FILL_EXPR is a number, and does not allow it to be an expression. Though that is allowed by specification: https://sourceware.org/binutils/docs-2.32/ld/Output-Section-Fill.html This patch adds a support for cases when FILL_EXPR is simple math expression. Fixes https://bugs.llvm.org/show_bug.cgi?id=42482. Differential revision: https://reviews.llvm.org/D64130 llvm-svn: 365143 --- lld/ELF/ScriptLexer.cpp | 9 ++++--- lld/ELF/ScriptParser.cpp | 40 ++++++++++++---------------- lld/test/ELF/linkerscript/fill.test | 2 +- lld/test/ELF/linkerscript/sections-padding.s | 24 +++++++++++++++-- 4 files changed, 46 insertions(+), 29 deletions(-) diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp index cd4873c..bb530d2 100644 --- a/lld/ELF/ScriptLexer.cpp +++ b/lld/ELF/ScriptLexer.cpp @@ -170,7 +170,7 @@ bool ScriptLexer::atEOF() { return errorCount() || Tokens.size() == Pos; } // Split a given string as an expression. // This function returns "3", "*" and "5" for "3*5" for example. static std::vector tokenizeExpr(StringRef S) { - StringRef Ops = "+-*/:!~"; // List of operators + StringRef Ops = "+-*/:!~=<>"; // List of operators // Quoted strings are literal strings, so we don't want to split it. if (S.startswith("\"")) @@ -191,8 +191,11 @@ static std::vector tokenizeExpr(StringRef S) { if (E != 0) Ret.push_back(S.substr(0, E)); - // Get the operator as a token. Keep != as one token. - if (S.substr(E).startswith("!=")) { + // Get the operator as a token. + // Keep !=, ==, >=, <=, << and >> operators as a single tokens. + if (S.substr(E).startswith("!=") || S.substr(E).startswith("==") || + S.substr(E).startswith(">=") || S.substr(E).startswith("<=") || + S.substr(E).startswith("<<") || S.substr(E).startswith(">>")) { Ret.push_back(S.substr(E, 2)); S = S.substr(E + 2); } else { diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index ab96b8e..eb896c6 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -85,7 +85,6 @@ private: SymbolAssignment *readSymbolAssignment(StringRef Name); ByteCommand *readByteCommand(StringRef Tok); std::array readFill(); - std::array parseFill(StringRef Tok); bool readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2); void readSectionAddressType(OutputSection *Cmd); OutputSection *readOverlaySectionDescription(); @@ -726,17 +725,6 @@ Expr ScriptParser::readAssert() { }; } -// Reads a FILL(expr) command. We handle the FILL command as an -// alias for =fillexp section attribute, which is different from -// what GNU linkers do. -// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html -std::array ScriptParser::readFill() { - expect("("); - std::array V = parseFill(next()); - expect(")"); - return V; -} - // Tries to read the special directive for an output section definition which // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)" or "(OVERLAY)". // Tok1 and Tok2 are next 2 tokens peeked. See comment for readSectionAddressType below. @@ -837,6 +825,9 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { // by name. This is for very old file formats such as ECOFF/XCOFF. // For ELF, we should ignore. } else if (Tok == "FILL") { + // We handle the FILL command as an alias for =fillexp section attribute, + // which is different from what GNU linkers do. + // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html Cmd->Filler = readFill(); } else if (Tok == "SORT") { readSort(); @@ -867,10 +858,12 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { Cmd->Phdrs = readOutputSectionPhdrs(); - if (consume("=")) - Cmd->Filler = parseFill(next()); - else if (peek().startswith("=")) - Cmd->Filler = parseFill(next().drop_front()); + if (peek() == "=" || peek().startswith("=")) { + InExpr = true; + consume("="); + Cmd->Filler = readFill(); + InExpr = false; + } // Consume optional comma following output section command. consume(","); @@ -880,20 +873,21 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { return Cmd; } -// Parses a given string as a octal/decimal/hexadecimal number and -// returns it as a big-endian number. Used for `=`. +// Reads a `=` expression and returns its value as a big-endian number. // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html +// We do not support using symbols in such expressions. // // When reading a hexstring, ld.bfd handles it as a blob of arbitrary // size, while ld.gold always handles it as a 32-bit big-endian number. // We are compatible with ld.gold because it's easier to implement. -std::array ScriptParser::parseFill(StringRef Tok) { - uint32_t V = 0; - if (!to_integer(Tok, V)) - setError("invalid filler expression: " + Tok); +std::array ScriptParser::readFill() { + uint64_t Value = readExpr()().Val; + if (Value > UINT32_MAX) + setError("filler expression result does not fit 32-bit: 0x" + + Twine::utohexstr(Value)); std::array Buf; - write32be(Buf.data(), V); + write32be(Buf.data(), (uint32_t)Value); return Buf; } diff --git a/lld/test/ELF/linkerscript/fill.test b/lld/test/ELF/linkerscript/fill.test index 5bf295e..10417bb 100644 --- a/lld/test/ELF/linkerscript/fill.test +++ b/lld/test/ELF/linkerscript/fill.test @@ -11,7 +11,7 @@ SECTIONS { . += 4; *(.bbb) . += 4; - FILL(0x22222222); + FILL(0x22220000 + 0x2222); . += 4; } } diff --git a/lld/test/ELF/linkerscript/sections-padding.s b/lld/test/ELF/linkerscript/sections-padding.s index 91ced2e..7b90746 100644 --- a/lld/test/ELF/linkerscript/sections-padding.s +++ b/lld/test/ELF/linkerscript/sections-padding.s @@ -29,9 +29,9 @@ # RUN: echo "SECTIONS { .mysec : { *(.mysec*) } =0x99XX }" > %t.script # RUN: not ld.lld -o %t.out --script %t.script %t 2>&1 \ # RUN: | FileCheck --check-prefix=ERR2 %s -# ERR2: invalid filler expression: 0x99XX +# ERR2: malformed number: 0x99XX -## Check case with space between '=' and expression: +## Check case with space between '=' and a value: # RUN: echo "SECTIONS { .mysec : { *(.mysec*) } = 0x1122 }" > %t.script # RUN: ld.lld -o %t.out --script %t.script %t # RUN: llvm-objdump -s %t.out | FileCheck -check-prefix=YES %s @@ -41,6 +41,26 @@ # RUN: ld.lld -o %t.out --script %t.script %t # RUN: llvm-objdump -s %t.out | FileCheck -check-prefix=YES %s +## Check we can use an artbitrary expression as a filler. +# RUN: echo "SECTIONS { .mysec : { *(.mysec*) } = ((0x11<<8) | 0x22) }" > %t.script +# RUN: ld.lld -o %t.out --script %t.script %t +# RUN: llvm-objdump -s %t.out | FileCheck -check-prefix=YES %s + +## Check case with space between '=' and expression: +# RUN: echo "SECTIONS { .mysec : { *(.mysec*) } =((0x11 << 8) | 0x22) }" > %t.script +# RUN: ld.lld -o %t.out --script %t.script %t +# RUN: llvm-objdump -s %t.out | FileCheck -check-prefix=YES %s + +## Check we report an error if expression value is larger that 32-bits. +# RUN: echo "SECTIONS { .mysec : { *(.mysec*) } =(0x11 << 32) }" > %t.script +# RUN: not ld.lld -o %t.out --script %t.script %t 2>&1 | FileCheck --check-prefix=ERR3 %s +# ERR3: filler expression result does not fit 32-bit: 0x1100000000 + +## Check we report an error if an expression use a symbol. +# RUN: echo "SECTIONS { foo = 0x11; .mysec : { *(.mysec*) } = foo }" > %t.script +# RUN: not ld.lld -o %t.out %t --script %t.script 2>&1 | FileCheck --check-prefix=ERR4 %s +# ERR4: symbol not found: foo + .section .mysec.1,"a" .align 16 .byte 0x66 -- 2.7.4