gold/script.cc

   1 // script.cc -- handle linker scripts for gold.
   2
   3 // Copyright 2006, 2007 Free Software Foundation, Inc.
   4 // Written by Ian Lance Taylor <iant@google.com>.
   5
   6 // This file is part of gold.
   7
   8 // This program is free software; you can redistribute it and/or modify
   9 // it under the terms of the GNU General Public License as published by
  10 // the Free Software Foundation; either version 3 of the License, or
  11 // (at your option) any later version.
  12
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU General Public License for more details.
  17
  18 // You should have received a copy of the GNU General Public License
  19 // along with this program; if not, write to the Free Software
  20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  21 // MA 02110-1301, USA.
  22
  23 #include "gold.h"
  24
  25 #include <string>
  26 #include <vector>
  27 #include <cstdio>
  28 #include <cstdlib>
  29 #include "filenames.h"
  30
  31 #include "dirsearch.h"
  32 #include "options.h"
  33 #include "fileread.h"
  34 #include "workqueue.h"
  35 #include "readsyms.h"
  36 #include "parameters.h"
  37 #include "yyscript.h"
  38 #include "script.h"
  39 #include "script-c.h"
  40
  41 namespace gold
  42 {
  43
  44 // A token read from a script file.  We don't implement keywords here;
  45 // all keywords are simply represented as a string.
  46
  47 class Token
  48 {
  49  public:
  50   // Token classification.
  51   enum Classification
  52   {
  53     // Token is invalid.
  54     TOKEN_INVALID,
  55     // Token indicates end of input.
  56     TOKEN_EOF,
  57     // Token is a string of characters.
  58     TOKEN_STRING,
  59     // Token is an operator.
  60     TOKEN_OPERATOR,
  61     // Token is a number (an integer).
  62     TOKEN_INTEGER
  63   };
  64
  65   // We need an empty constructor so that we can put this STL objects.
  66   Token()
  67     : classification_(TOKEN_INVALID), value_(), opcode_(0),
  68       lineno_(0), charpos_(0)
  69   { }
  70
  71   // A general token with no value.
  72   Token(Classification classification, int lineno, int charpos)
  73     : classification_(classification), value_(), opcode_(0),
  74       lineno_(lineno), charpos_(charpos)
  75   {
  76     gold_assert(classification == TOKEN_INVALID
  77                 || classification == TOKEN_EOF);
  78   }
  79
  80   // A general token with a value.
  81   Token(Classification classification, const std::string& value,
  82         int lineno, int charpos)
  83     : classification_(classification), value_(value), opcode_(0),
  84       lineno_(lineno), charpos_(charpos)
  85   {
  86     gold_assert(classification != TOKEN_INVALID
  87                 && classification != TOKEN_EOF);
  88   }
  89
  90   // A token representing a string of characters.
  91   Token(const std::string& s, int lineno, int charpos)
  92     : classification_(TOKEN_STRING), value_(s), opcode_(0),
  93       lineno_(lineno), charpos_(charpos)
  94   { }
  95
  96   // A token representing an operator.
  97   Token(int opcode, int lineno, int charpos)
  98     : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode),
  99       lineno_(lineno), charpos_(charpos)
 100   { }
 101
 102   // Return whether the token is invalid.
 103   bool
 104   is_invalid() const
 105   { return this->classification_ == TOKEN_INVALID; }
 106
 107   // Return whether this is an EOF token.
 108   bool
 109   is_eof() const
 110   { return this->classification_ == TOKEN_EOF; }
 111
 112   // Return the token classification.
 113   Classification
 114   classification() const
 115   { return this->classification_; }
 116
 117   // Return the line number at which the token starts.
 118   int
 119   lineno() const
 120   { return this->lineno_; }
 121
 122   // Return the character position at this the token starts.
 123   int
 124   charpos() const
 125   { return this->charpos_; }
 126
 127   // Get the value of a token.
 128
 129   const std::string&
 130   string_value() const
 131   {
 132     gold_assert(this->classification_ == TOKEN_STRING);
 133     return this->value_;
 134   }
 135
 136   int
 137   operator_value() const
 138   {
 139     gold_assert(this->classification_ == TOKEN_OPERATOR);
 140     return this->opcode_;
 141   }
 142
 143   int64_t
 144   integer_value() const
 145   {
 146     gold_assert(this->classification_ == TOKEN_INTEGER);
 147     return strtoll(this->value_.c_str(), NULL, 0);
 148   }
 149
 150  private:
 151   // The token classification.
 152   Classification classification_;
 153   // The token value, for TOKEN_STRING or TOKEN_INTEGER.
 154   std::string value_;
 155   // The token value, for TOKEN_OPERATOR.
 156   int opcode_;
 157   // The line number where this token started (one based).
 158   int lineno_;
 159   // The character position within the line where this token started
 160   // (one based).
 161   int charpos_;
 162 };
 163
 164 // This class handles lexing a file into a sequence of tokens.  We
 165 // don't expect linker scripts to be large, so we just read them and
 166 // tokenize them all at once.
 167
 168 class Lex
 169 {
 170  public:
 171   Lex(Input_file* input_file)
 172     : input_file_(input_file), tokens_()
 173   { }
 174
 175   // Tokenize the file.  Return the final token, which will be either
 176   // an invalid token or an EOF token.  An invalid token indicates
 177   // that tokenization failed.
 178   Token
 179   tokenize();
 180
 181   // A token sequence.
 182   typedef std::vector<Token> Token_sequence;
 183
 184   // Return the tokens.
 185   const Token_sequence&
 186   tokens() const
 187   { return this->tokens_; }
 188
 189  private:
 190   Lex(const Lex&);
 191   Lex& operator=(const Lex&);
 192
 193   // Read the file into a string buffer.
 194   void
 195   read_file(std::string*);
 196
 197   // Make a general token with no value at the current location.
 198   Token
 199   make_token(Token::Classification c, const char* p) const
 200   { return Token(c, this->lineno_, p - this->linestart_ + 1); }
 201
 202   // Make a general token with a value at the current location.
 203   Token
 204   make_token(Token::Classification c, const std::string& v, const char* p)
 205     const
 206   { return Token(c, v, this->lineno_, p - this->linestart_ + 1); }
 207
 208   // Make an operator token at the current location.
 209   Token
 210   make_token(int opcode, const char* p) const
 211   { return Token(opcode, this->lineno_, p - this->linestart_ + 1); }
 212
 213   // Make an invalid token at the current location.
 214   Token
 215   make_invalid_token(const char* p)
 216   { return this->make_token(Token::TOKEN_INVALID, p); }
 217
 218   // Make an EOF token at the current location.
 219   Token
 220   make_eof_token(const char* p)
 221   { return this->make_token(Token::TOKEN_EOF, p); }
 222
 223   // Return whether C can be the first character in a name.  C2 is the
 224   // next character, since we sometimes need that.
 225   static inline bool
 226   can_start_name(char c, char c2);
 227
 228   // Return whether C can appear in a name which has already started.
 229   static inline bool
 230   can_continue_name(char c);
 231
 232   // Return whether C, C2, C3 can start a hex number.
 233   static inline bool
 234   can_start_hex(char c, char c2, char c3);
 235
 236   // Return whether C can appear in a hex number.
 237   static inline bool
 238   can_continue_hex(char c);
 239
 240   // Return whether C can start a non-hex number.
 241   static inline bool
 242   can_start_number(char c);
 243
 244   // Return whether C can appear in a non-hex number.
 245   static inline bool
 246   can_continue_number(char c)
 247   { return Lex::can_start_number(c); }
 248
 249   // If C1 C2 C3 form a valid three character operator, return the
 250   // opcode.  Otherwise return 0.
 251   static inline int
 252   three_char_operator(char c1, char c2, char c3);
 253
 254   // If C1 C2 form a valid two character operator, return the opcode.
 255   // Otherwise return 0.
 256   static inline int
 257   two_char_operator(char c1, char c2);
 258
 259   // If C1 is a valid one character operator, return the opcode.
 260   // Otherwise return 0.
 261   static inline int
 262   one_char_operator(char c1);
 263
 264   // Read the next token.
 265   Token
 266   get_token(const char**);
 267
 268   // Skip a C style /* */ comment.  Return false if the comment did
 269   // not end.
 270   bool
 271   skip_c_comment(const char**);
 272
 273   // Skip a line # comment.  Return false if there was no newline.
 274   bool
 275   skip_line_comment(const char**);
 276
 277   // Build a token CLASSIFICATION from all characters that match
 278   // CAN_CONTINUE_FN.  The token starts at START.  Start matching from
 279   // MATCH.  Set *PP to the character following the token.
 280   inline Token
 281   gather_token(Token::Classification, bool (*can_continue_fn)(char),
 282                const char* start, const char* match, const char** pp);
 283
 284   // Build a token from a quoted string.
 285   Token
 286   gather_quoted_string(const char** pp);
 287
 288   // The file we are reading.
 289   Input_file* input_file_;
 290   // The token sequence we create.
 291   Token_sequence tokens_;
 292   // The current line number.
 293   int lineno_;
 294   // The start of the current line in the buffer.
 295   const char* linestart_;
 296 };
 297
 298 // Read the whole file into memory.  We don't expect linker scripts to
 299 // be large, so we just use a std::string as a buffer.  We ignore the
 300 // data we've already read, so that we read aligned buffers.
 301
 302 void
 303 Lex::read_file(std::string* contents)
 304 {
 305   off_t filesize = this->input_file_->file().filesize();
 306   contents->clear();
 307   contents->reserve(filesize);
 308
 309   off_t off = 0;
 310   unsigned char buf[BUFSIZ];
 311   while (off < filesize)
 312     {
 313       off_t get = BUFSIZ;
 314       if (get > filesize - off)
 315         get = filesize - off;
 316       this->input_file_->file().read(off, get, buf);
 317       contents->append(reinterpret_cast<char*>(&buf[0]), get);
 318       off += get;
 319     }
 320 }
 321
 322 // Return whether C can be the start of a name, if the next character
 323 // is C2.  A name can being with a letter, underscore, period, or
 324 // dollar sign.  Because a name can be a file name, we also permit
 325 // forward slash, backslash, and tilde.  Tilde is the tricky case
 326 // here; GNU ld also uses it as a bitwise not operator.  It is only
 327 // recognized as the operator if it is not immediately followed by
 328 // some character which can appear in a symbol.  That is, "~0" is a
 329 // symbol name, and "~ 0" is an expression using bitwise not.  We are
 330 // compatible.
 331
 332 inline bool
 333 Lex::can_start_name(char c, char c2)
 334 {
 335   switch (c)
 336     {
 337     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 338     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 339     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 340     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 341     case 'Y': case 'Z':
 342     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 343     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 344     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 345     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 346     case 'y': case 'z':
 347     case '_': case '.': case '$': case '/': case '\\':
 348       return true;
 349
 350     case '~':
 351       return can_continue_name(c2);
 352
 353     default:
 354       return false;
 355     }
 356 }
 357
 358 // Return whether C can continue a name which has already started.
 359 // Subsequent characters in a name are the same as the leading
 360 // characters, plus digits and "=+-:[],?*".  So in general the linker
 361 // script language requires spaces around operators.
 362
 363 inline bool
 364 Lex::can_continue_name(char c)
 365 {
 366   switch (c)
 367     {
 368     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 369     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 370     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 371     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 372     case 'Y': case 'Z':
 373     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 374     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 375     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 376     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 377     case 'y': case 'z':
 378     case '_': case '.': case '$': case '/': case '\\':
 379     case '~':
 380     case '0': case '1': case '2': case '3': case '4':
 381     case '5': case '6': case '7': case '8': case '9':
 382     case '=': case '+': case '-': case ':': case '[': case ']':
 383     case ',': case '?': case '*':
 384       return true;
 385
 386     default:
 387       return false;
 388     }
 389 }
 390
 391 // For a number we accept 0x followed by hex digits, or any sequence
 392 // of digits.  The old linker accepts leading '$' for hex, and
 393 // trailing HXBOD.  Those are for MRI compatibility and we don't
 394 // accept them.  The old linker also accepts trailing MK for mega or
 395 // kilo.  Those are mentioned in the documentation, and we accept
 396 // them.
 397
 398 // Return whether C1 C2 C3 can start a hex number.
 399
 400 inline bool
 401 Lex::can_start_hex(char c1, char c2, char c3)
 402 {
 403   if (c1 == '0' && (c2 == 'x' || c2 == 'X'))
 404     return Lex::can_continue_hex(c3);
 405   return false;
 406 }
 407
 408 // Return whether C can appear in a hex number.
 409
 410 inline bool
 411 Lex::can_continue_hex(char c)
 412 {
 413   switch (c)
 414     {
 415     case '0': case '1': case '2': case '3': case '4':
 416     case '5': case '6': case '7': case '8': case '9':
 417     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 418     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 419       return true;
 420
 421     default:
 422       return false;
 423     }
 424 }
 425
 426 // Return whether C can start a non-hex number.
 427
 428 inline bool
 429 Lex::can_start_number(char c)
 430 {
 431   switch (c)
 432     {
 433     case '0': case '1': case '2': case '3': case '4':
 434     case '5': case '6': case '7': case '8': case '9':
 435       return true;
 436
 437     default:
 438       return false;
 439     }
 440 }
 441
 442 // If C1 C2 C3 form a valid three character operator, return the
 443 // opcode (defined in the yyscript.h file generated from yyscript.y).
 444 // Otherwise return 0.
 445
 446 inline int
 447 Lex::three_char_operator(char c1, char c2, char c3)
 448 {
 449   switch (c1)
 450     {
 451     case '<':
 452       if (c2 == '<' && c3 == '=')
 453         return LSHIFTEQ;
 454       break;
 455     case '>':
 456       if (c2 == '>' && c3 == '=')
 457         return RSHIFTEQ;
 458       break;
 459     default:
 460       break;
 461     }
 462   return 0;
 463 }
 464
 465 // If C1 C2 form a valid two character operator, return the opcode
 466 // (defined in the yyscript.h file generated from yyscript.y).
 467 // Otherwise return 0.
 468
 469 inline int
 470 Lex::two_char_operator(char c1, char c2)
 471 {
 472   switch (c1)
 473     {
 474     case '=':
 475       if (c2 == '=')
 476         return EQ;
 477       break;
 478     case '!':
 479       if (c2 == '=')
 480         return NE;
 481       break;
 482     case '+':
 483       if (c2 == '=')
 484         return PLUSEQ;
 485       break;
 486     case '-':
 487       if (c2 == '=')
 488         return MINUSEQ;
 489       break;
 490     case '*':
 491       if (c2 == '=')
 492         return MULTEQ;
 493       break;
 494     case '/':
 495       if (c2 == '=')
 496         return DIVEQ;
 497       break;
 498     case '|':
 499       if (c2 == '=')
 500         return OREQ;
 501       if (c2 == '|')
 502         return OROR;
 503       break;
 504     case '&':
 505       if (c2 == '=')
 506         return ANDEQ;
 507       if (c2 == '&')
 508         return ANDAND;
 509       break;
 510     case '>':
 511       if (c2 == '=')
 512         return GE;
 513       if (c2 == '>')
 514         return RSHIFT;
 515       break;
 516     case '<':
 517       if (c2 == '=')
 518         return LE;
 519       if (c2 == '<')
 520         return LSHIFT;
 521       break;
 522     default:
 523       break;
 524     }
 525   return 0;
 526 }
 527
 528 // If C1 is a valid operator, return the opcode.  Otherwise return 0.
 529
 530 inline int
 531 Lex::one_char_operator(char c1)
 532 {
 533   switch (c1)
 534     {
 535     case '+':
 536     case '-':
 537     case '*':
 538     case '/':
 539     case '%':
 540     case '!':
 541     case '&':
 542     case '|':
 543     case '^':
 544     case '~':
 545     case '<':
 546     case '>':
 547     case '=':
 548     case '?':
 549     case ',':
 550     case '(':
 551     case ')':
 552     case '{':
 553     case '}':
 554     case '[':
 555     case ']':
 556     case ':':
 557     case ';':
 558       return c1;
 559     default:
 560       return 0;
 561     }
 562 }
 563
 564 // Skip a C style comment.  *PP points to just after the "/*".  Return
 565 // false if the comment did not end.
 566
 567 bool
 568 Lex::skip_c_comment(const char** pp)
 569 {
 570   const char* p = *pp;
 571   while (p[0] != '*' || p[1] != '/')
 572     {
 573       if (*p == '\0')
 574         {
 575           *pp = p;
 576           return false;
 577         }
 578
 579       if (*p == '\n')
 580         {
 581           ++this->lineno_;
 582           this->linestart_ = p + 1;
 583         }
 584       ++p;
 585     }
 586
 587   *pp = p + 2;
 588   return true;
 589 }
 590
 591 // Skip a line # comment.  Return false if there was no newline.
 592
 593 bool
 594 Lex::skip_line_comment(const char** pp)
 595 {
 596   const char* p = *pp;
 597   size_t skip = strcspn(p, "\n");
 598   if (p[skip] == '\0')
 599     {
 600       *pp = p + skip;
 601       return false;
 602     }
 603
 604   p += skip + 1;
 605   ++this->lineno_;
 606   this->linestart_ = p;
 607   *pp = p;
 608
 609   return true;
 610 }
 611
 612 // Build a token CLASSIFICATION from all characters that match
 613 // CAN_CONTINUE_FN.  Update *PP.
 614
 615 inline Token
 616 Lex::gather_token(Token::Classification classification,
 617                   bool (*can_continue_fn)(char),
 618                   const char* start,
 619                   const char* match,
 620                   const char **pp)
 621 {
 622   while ((*can_continue_fn)(*match))
 623     ++match;
 624   *pp = match;
 625   return this->make_token(classification,
 626                           std::string(start, match - start),
 627                           start);
 628 }
 629
 630 // Build a token from a quoted string.
 631
 632 Token
 633 Lex::gather_quoted_string(const char** pp)
 634 {
 635   const char* start = *pp;
 636   const char* p = start;
 637   ++p;
 638   size_t skip = strcspn(p, "\"\n");
 639   if (p[skip] != '"')
 640     return this->make_invalid_token(start);
 641   *pp = p + skip + 1;
 642   return this->make_token(Token::TOKEN_STRING,
 643                           std::string(p, skip),
 644                           start);
 645 }
 646
 647 // Return the next token at *PP.  Update *PP.  General guideline: we
 648 // require linker scripts to be simple ASCII.  No unicode linker
 649 // scripts.  In particular we can assume that any '\0' is the end of
 650 // the input.
 651
 652 Token
 653 Lex::get_token(const char** pp)
 654 {
 655   const char* p = *pp;
 656
 657   while (true)
 658     {
 659       if (*p == '\0')
 660         {
 661           *pp = p;
 662           return this->make_eof_token(p);
 663         }
 664
 665       // Skip whitespace quickly.
 666       while (*p == ' ' || *p == '\t')
 667         ++p;
 668
 669       if (*p == '\n')
 670         {
 671           ++p;
 672           ++this->lineno_;
 673           this->linestart_ = p;
 674           continue;
 675         }
 676
 677       // Skip C style comments.
 678       if (p[0] == '/' && p[1] == '*')
 679         {
 680           int lineno = this->lineno_;
 681           int charpos = p - this->linestart_ + 1;
 682
 683           *pp = p + 2;
 684           if (!this->skip_c_comment(pp))
 685             return Token(Token::TOKEN_INVALID, lineno, charpos);
 686           p = *pp;
 687
 688           continue;
 689         }
 690
 691       // Skip line comments.
 692       if (*p == '#')
 693         {
 694           *pp = p + 1;
 695           if (!this->skip_line_comment(pp))
 696             return this->make_eof_token(p);
 697           p = *pp;
 698           continue;
 699         }
 700
 701       // Check for a name.
 702       if (Lex::can_start_name(p[0], p[1]))
 703         return this->gather_token(Token::TOKEN_STRING,
 704                                   Lex::can_continue_name,
 705                                   p, p + 2, pp);
 706
 707       // We accept any arbitrary name in double quotes, as long as it
 708       // does not cross a line boundary.
 709       if (*p == '"')
 710         {
 711           *pp = p;
 712           return this->gather_quoted_string(pp);
 713         }
 714
 715       // Check for a number.
 716
 717       if (Lex::can_start_hex(p[0], p[1], p[2]))
 718         return this->gather_token(Token::TOKEN_INTEGER,
 719                                   Lex::can_continue_hex,
 720                                   p, p + 3, pp);
 721
 722       if (Lex::can_start_number(p[0]))
 723         return this->gather_token(Token::TOKEN_INTEGER,
 724                                   Lex::can_continue_number,
 725                                   p, p + 1, pp);
 726
 727       // Check for operators.
 728
 729       int opcode = Lex::three_char_operator(p[0], p[1], p[2]);
 730       if (opcode != 0)
 731         {
 732           *pp = p + 3;
 733           return this->make_token(opcode, p);
 734         }
 735
 736       opcode = Lex::two_char_operator(p[0], p[1]);
 737       if (opcode != 0)
 738         {
 739           *pp = p + 2;
 740           return this->make_token(opcode, p);
 741         }
 742
 743       opcode = Lex::one_char_operator(p[0]);
 744       if (opcode != 0)
 745         {
 746           *pp = p + 1;
 747           return this->make_token(opcode, p);
 748         }
 749
 750       return this->make_token(Token::TOKEN_INVALID, p);
 751     }
 752 }
 753
 754 // Tokenize the file.  Return the final token.
 755
 756 Token
 757 Lex::tokenize()
 758 {
 759   std::string contents;
 760   this->read_file(&contents);
 761
 762   const char* p = contents.c_str();
 763
 764   this->lineno_ = 1;
 765   this->linestart_ = p;
 766
 767   while (true)
 768     {
 769       Token t(this->get_token(&p));
 770
 771       // Don't let an early null byte fool us into thinking that we've
 772       // reached the end of the file.
 773       if (t.is_eof()
 774           && static_cast<size_t>(p - contents.c_str()) < contents.length())
 775         t = this->make_invalid_token(p);
 776
 777       if (t.is_invalid() || t.is_eof())
 778         return t;
 779
 780       this->tokens_.push_back(t);
 781     }
 782 }
 783
 784 // A trivial task which waits for THIS_BLOCKER to be clear and then
 785 // clears NEXT_BLOCKER.  THIS_BLOCKER may be NULL.
 786
 787 class Script_unblock : public Task
 788 {
 789  public:
 790   Script_unblock(Task_token* this_blocker, Task_token* next_blocker)
 791     : this_blocker_(this_blocker), next_blocker_(next_blocker)
 792   { }
 793
 794   ~Script_unblock()
 795   {
 796     if (this->this_blocker_ != NULL)
 797       delete this->this_blocker_;
 798   }
 799
 800   Is_runnable_type
 801   is_runnable(Workqueue*)
 802   {
 803     if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked())
 804       return IS_BLOCKED;
 805     return IS_RUNNABLE;
 806   }
 807
 808   Task_locker*
 809   locks(Workqueue* workqueue)
 810   {
 811     return new Task_locker_block(*this->next_blocker_, workqueue);
 812   }
 813
 814   void
 815   run(Workqueue*)
 816   { }
 817
 818   std::string
 819   get_name() const
 820   { return "Script_unblock"; }
 821
 822  private:
 823   Task_token* this_blocker_;
 824   Task_token* next_blocker_;
 825 };
 826
 827 // This class holds data passed through the parser to the lexer and to
 828 // the parser support functions.  This avoids global variables.  We
 829 // can't use global variables because we need not be called in the
 830 // main thread.
 831
 832 class Parser_closure
 833 {
 834  public:
 835   Parser_closure(const char* filename,
 836                  const Position_dependent_options& posdep_options,
 837                  bool in_group, bool is_in_sysroot,
 838                  Command_line* command_line,
 839                  const Lex::Token_sequence* tokens)
 840     : filename_(filename), posdep_options_(posdep_options),
 841       in_group_(in_group), is_in_sysroot_(is_in_sysroot),
 842       command_line_(command_line), tokens_(tokens),
 843       next_token_index_(0), inputs_(NULL)
 844   { }
 845
 846   // Return the file name.
 847   const char*
 848   filename() const
 849   { return this->filename_; }
 850
 851   // Return the position dependent options.  The caller may modify
 852   // this.
 853   Position_dependent_options&
 854   position_dependent_options()
 855   { return this->posdep_options_; }
 856
 857   // Return whether this script is being run in a group.
 858   bool
 859   in_group() const
 860   { return this->in_group_; }
 861
 862   // Return whether this script was found using a directory in the
 863   // sysroot.
 864   bool
 865   is_in_sysroot() const
 866   { return this->is_in_sysroot_; }
 867
 868   // Returns the Command_line structure passed in at constructor time.
 869   // This value may be NULL.  The caller may modify this, which modifies
 870   // the passed-in Command_line object (not a copy).
 871   Command_line* command_line()
 872   { return this->command_line_; }
 873
 874   // Whether we are at the end of the token list.
 875   bool
 876   at_eof() const
 877   { return this->next_token_index_ >= this->tokens_->size(); }
 878
 879   // Return the next token.
 880   const Token*
 881   next_token()
 882   {
 883     const Token* ret = &(*this->tokens_)[this->next_token_index_];
 884     ++this->next_token_index_;
 885     return ret;
 886   }
 887
 888   // Return the list of input files, creating it if necessary.  This
 889   // is a space leak--we never free the INPUTS_ pointer.
 890   Input_arguments*
 891   inputs()
 892   {
 893     if (this->inputs_ == NULL)
 894       this->inputs_ = new Input_arguments();
 895     return this->inputs_;
 896   }
 897
 898   // Return whether we saw any input files.
 899   bool
 900   saw_inputs() const
 901   { return this->inputs_ != NULL && !this->inputs_->empty(); }
 902
 903  private:
 904   // The name of the file we are reading.
 905   const char* filename_;
 906   // The position dependent options.
 907   Position_dependent_options posdep_options_;
 908   // Whether we are currently in a --start-group/--end-group.
 909   bool in_group_;
 910   // Whether the script was found in a sysrooted directory.
 911   bool is_in_sysroot_;
 912   // May be NULL if the user chooses not to pass one in.
 913   Command_line* command_line_;
 914
 915   // The tokens to be returned by the lexer.
 916   const Lex::Token_sequence* tokens_;
 917   // The index of the next token to return.
 918   unsigned int next_token_index_;
 919   // New input files found to add to the link.
 920   Input_arguments* inputs_;
 921 };
 922
 923 // FILE was found as an argument on the command line.  Try to read it
 924 // as a script.  We've already read BYTES of data into P, but we
 925 // ignore that.  Return true if the file was handled.
 926
 927 bool
 928 read_input_script(Workqueue* workqueue, const General_options& options,
 929                   Symbol_table* symtab, Layout* layout,
 930                   const Dirsearch& dirsearch, Input_objects* input_objects,
 931                   Input_group* input_group,
 932                   const Input_argument* input_argument,
 933                   Input_file* input_file, const unsigned char*, off_t,
 934                   Task_token* this_blocker, Task_token* next_blocker)
 935 {
 936   Lex lex(input_file);
 937   if (lex.tokenize().is_invalid())
 938     return false;
 939
 940   Parser_closure closure(input_file->filename().c_str(),
 941                          input_argument->file().options(),
 942                          input_group != NULL,
 943                          input_file->is_in_sysroot(),
 944                          NULL,
 945                          &lex.tokens());
 946
 947   if (yyparse(&closure) != 0)
 948     return false;
 949
 950   // THIS_BLOCKER must be clear before we may add anything to the
 951   // symbol table.  We are responsible for unblocking NEXT_BLOCKER
 952   // when we are done.  We are responsible for deleting THIS_BLOCKER
 953   // when it is unblocked.
 954
 955   if (!closure.saw_inputs())
 956     {
 957       // The script did not add any files to read.  Note that we are
 958       // not permitted to call NEXT_BLOCKER->unblock() here even if
 959       // THIS_BLOCKER is NULL, as we are not in the main thread.
 960       workqueue->queue(new Script_unblock(this_blocker, next_blocker));
 961       return true;
 962     }
 963
 964   for (Input_arguments::const_iterator p = closure.inputs()->begin();
 965        p != closure.inputs()->end();
 966        ++p)
 967     {
 968       Task_token* nb;
 969       if (p + 1 == closure.inputs()->end())
 970         nb = next_blocker;
 971       else
 972         {
 973           nb = new Task_token();
 974           nb->add_blocker();
 975         }
 976       workqueue->queue(new Read_symbols(options, input_objects, symtab,
 977                                         layout, dirsearch, &*p,
 978                                         input_group, this_blocker, nb));
 979       this_blocker = nb;
 980     }
 981
 982   return true;
 983 }
 984
 985 // FILENAME was found as an argument to --script (-T).
 986 // Read it as a script, and execute its contents immediately.
 987
 988 bool
 989 read_commandline_script(const char* filename, Command_line* cmdline)
 990 {
 991   // TODO: if filename is a relative filename, search for it manually
 992   // using "." + cmdline->options()->search_path() -- not dirsearch.
 993   Dirsearch dirsearch;
 994
 995   Input_file_argument input_argument(filename, false, "",
 996                                      cmdline->position_dependent_options());
 997   Input_file input_file(&input_argument);
 998   if (!input_file.open(cmdline->options(), dirsearch))
 999     return false;
1000
1001   Lex lex(&input_file);
1002   if (lex.tokenize().is_invalid())
1003     {
1004       // Opening the file locked it, so now we need to unlock it.
1005       input_file.file().unlock();
1006       return false;
1007     }
1008
1009   Parser_closure closure(filename,
1010                          cmdline->position_dependent_options(),
1011                          false,
1012                          input_file.is_in_sysroot(),
1013                          cmdline,
1014                          &lex.tokens());
1015   if (yyparse(&closure) != 0)
1016     {
1017       input_file.file().unlock();
1018       return false;
1019     }
1020
1021   input_file.file().unlock();
1022   return true;
1023 }
1024
1025 // Manage mapping from keywords to the codes expected by the bison
1026 // parser.
1027
1028 class Keyword_to_parsecode
1029 {
1030  public:
1031   // The structure which maps keywords to parsecodes.
1032   struct Keyword_parsecode
1033   {
1034     // Keyword.
1035     const char* keyword;
1036     // Corresponding parsecode.
1037     int parsecode;
1038   };
1039
1040   // Return the parsecode corresponding KEYWORD, or 0 if it is not a
1041   // keyword.
1042   static int
1043   keyword_to_parsecode(const char* keyword);
1044
1045  private:
1046   // The array of all keywords.
1047   static const Keyword_parsecode keyword_parsecodes_[];
1048
1049   // The number of keywords.
1050   static const int keyword_count;
1051 };
1052
1053 // Mapping from keyword string to keyword parsecode.  This array must
1054 // be kept in sorted order.  Parsecodes are looked up using bsearch.
1055 // This array must correspond to the list of parsecodes in yyscript.y.
1056
1057 const Keyword_to_parsecode::Keyword_parsecode
1058 Keyword_to_parsecode::keyword_parsecodes_[] =
1059 {
1060   { "ABSOLUTE", ABSOLUTE },
1061   { "ADDR", ADDR },
1062   { "ALIGN", ALIGN_K },
1063   { "ASSERT", ASSERT_K },
1064   { "AS_NEEDED", AS_NEEDED },
1065   { "AT", AT },
1066   { "BIND", BIND },
1067   { "BLOCK", BLOCK },
1068   { "BYTE", BYTE },
1069   { "CONSTANT", CONSTANT },
1070   { "CONSTRUCTORS", CONSTRUCTORS },
1071   { "COPY", COPY },
1072   { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS },
1073   { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN },
1074   { "DATA_SEGMENT_END", DATA_SEGMENT_END },
1075   { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END },
1076   { "DEFINED", DEFINED },
1077   { "DSECT", DSECT },
1078   { "ENTRY", ENTRY },
1079   { "EXCLUDE_FILE", EXCLUDE_FILE },
1080   { "EXTERN", EXTERN },
1081   { "FILL", FILL },
1082   { "FLOAT", FLOAT },
1083   { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION },
1084   { "GROUP", GROUP },
1085   { "HLL", HLL },
1086   { "INCLUDE", INCLUDE },
1087   { "INFO", INFO },
1088   { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION },
1089   { "INPUT", INPUT },
1090   { "KEEP", KEEP },
1091   { "LENGTH", LENGTH },
1092   { "LOADADDR", LOADADDR },
1093   { "LONG", LONG },
1094   { "MAP", MAP },
1095   { "MAX", MAX_K },
1096   { "MEMORY", MEMORY },
1097   { "MIN", MIN_K },
1098   { "NEXT", NEXT },
1099   { "NOCROSSREFS", NOCROSSREFS },
1100   { "NOFLOAT", NOFLOAT },
1101   { "NOLOAD", NOLOAD },
1102   { "ONLY_IF_RO", ONLY_IF_RO },
1103   { "ONLY_IF_RW", ONLY_IF_RW },
1104   { "OPTION", OPTION },
1105   { "ORIGIN", ORIGIN },
1106   { "OUTPUT", OUTPUT },
1107   { "OUTPUT_ARCH", OUTPUT_ARCH },
1108   { "OUTPUT_FORMAT", OUTPUT_FORMAT },
1109   { "OVERLAY", OVERLAY },
1110   { "PHDRS", PHDRS },
1111   { "PROVIDE", PROVIDE },
1112   { "PROVIDE_HIDDEN", PROVIDE_HIDDEN },
1113   { "QUAD", QUAD },
1114   { "SEARCH_DIR", SEARCH_DIR },
1115   { "SECTIONS", SECTIONS },
1116   { "SEGMENT_START", SEGMENT_START },
1117   { "SHORT", SHORT },
1118   { "SIZEOF", SIZEOF },
1119   { "SIZEOF_HEADERS", SIZEOF_HEADERS },
1120   { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT },
1121   { "SORT_BY_NAME", SORT_BY_NAME },
1122   { "SPECIAL", SPECIAL },
1123   { "SQUAD", SQUAD },
1124   { "STARTUP", STARTUP },
1125   { "SUBALIGN", SUBALIGN },
1126   { "SYSLIB", SYSLIB },
1127   { "TARGET", TARGET_K },
1128   { "TRUNCATE", TRUNCATE },
1129   { "VERSION", VERSIONK },
1130   { "global", GLOBAL },
1131   { "l", LENGTH },
1132   { "len", LENGTH },
1133   { "local", LOCAL },
1134   { "o", ORIGIN },
1135   { "org", ORIGIN },
1136   { "sizeof_headers", SIZEOF_HEADERS },
1137 };
1138
1139 const int Keyword_to_parsecode::keyword_count =
1140   (sizeof(Keyword_to_parsecode::keyword_parsecodes_)
1141    / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]));
1142
1143 // Comparison function passed to bsearch.
1144
1145 extern "C"
1146 {
1147
1148 static int
1149 ktt_compare(const void* keyv, const void* kttv)
1150 {
1151   const char* key = static_cast<const char*>(keyv);
1152   const Keyword_to_parsecode::Keyword_parsecode* ktt =
1153     static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv);
1154   return strcmp(key, ktt->keyword);
1155 }
1156
1157 } // End extern "C".
1158
1159 int
1160 Keyword_to_parsecode::keyword_to_parsecode(const char* keyword)
1161 {
1162   void* kttv = bsearch(keyword,
1163                        Keyword_to_parsecode::keyword_parsecodes_,
1164                        Keyword_to_parsecode::keyword_count,
1165                        sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]),
1166                        ktt_compare);
1167   if (kttv == NULL)
1168     return 0;
1169   Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv);
1170   return ktt->parsecode;
1171 }
1172
1173 } // End namespace gold.
1174
1175 // The remaining functions are extern "C", so it's clearer to not put
1176 // them in namespace gold.
1177
1178 using namespace gold;
1179
1180 // This function is called by the bison parser to return the next
1181 // token.
1182
1183 extern "C" int
1184 yylex(YYSTYPE* lvalp, void* closurev)
1185 {
1186   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1187
1188   if (closure->at_eof())
1189     return 0;
1190
1191   const Token* token = closure->next_token();
1192
1193   switch (token->classification())
1194     {
1195     default:
1196     case Token::TOKEN_INVALID:
1197     case Token::TOKEN_EOF:
1198       gold_unreachable();
1199
1200     case Token::TOKEN_STRING:
1201       {
1202         const char* str = token->string_value().c_str();
1203         int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str);
1204         if (parsecode != 0)
1205           return parsecode;
1206         lvalp->string = str;
1207         return STRING;
1208       }
1209
1210     case Token::TOKEN_OPERATOR:
1211       return token->operator_value();
1212
1213     case Token::TOKEN_INTEGER:
1214       lvalp->integer = token->integer_value();
1215       return INTEGER;
1216     }
1217 }
1218
1219 // This function is called by the bison parser to report an error.
1220
1221 extern "C" void
1222 yyerror(void* closurev, const char* message)
1223 {
1224   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1225
1226   gold_error(_("%s: %s"), closure->filename(), message);
1227 }
1228
1229 // Called by the bison parser to add a file to the link.
1230
1231 extern "C" void
1232 script_add_file(void* closurev, const char* name)
1233 {
1234   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1235
1236   // If this is an absolute path, and we found the script in the
1237   // sysroot, then we want to prepend the sysroot to the file name.
1238   // For example, this is how we handle a cross link to the x86_64
1239   // libc.so, which refers to /lib/libc.so.6.
1240   std::string name_string;
1241   const char* extra_search_path = ".";
1242   std::string script_directory;
1243   if (IS_ABSOLUTE_PATH (name))
1244     {
1245       if (closure->is_in_sysroot())
1246         {
1247           const std::string& sysroot(parameters->sysroot());
1248           gold_assert(!sysroot.empty());
1249           name_string = sysroot + name;
1250           name = name_string.c_str();
1251         }
1252     }
1253   else
1254     {
1255       // In addition to checking the normal library search path, we
1256       // also want to check in the script-directory.
1257       const char *slash = strrchr(closure->filename(), '/');
1258       if (slash != NULL)
1259         {
1260           script_directory.assign(closure->filename(),
1261                                   slash - closure->filename() + 1);
1262           extra_search_path = script_directory.c_str();
1263         }
1264     }
1265
1266   Input_file_argument file(name, false, extra_search_path,
1267                            closure->position_dependent_options());
1268   closure->inputs()->add_file(file);
1269 }
1270
1271 // Called by the bison parser to start a group.  If we are already in
1272 // a group, that means that this script was invoked within a
1273 // --start-group --end-group sequence on the command line, or that
1274 // this script was found in a GROUP of another script.  In that case,
1275 // we simply continue the existing group, rather than starting a new
1276 // one.  It is possible to construct a case in which this will do
1277 // something other than what would happen if we did a recursive group,
1278 // but it's hard to imagine why the different behaviour would be
1279 // useful for a real program.  Avoiding recursive groups is simpler
1280 // and more efficient.
1281
1282 extern "C" void
1283 script_start_group(void* closurev)
1284 {
1285   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1286   if (!closure->in_group())
1287     closure->inputs()->start_group();
1288 }
1289
1290 // Called by the bison parser at the end of a group.
1291
1292 extern "C" void
1293 script_end_group(void* closurev)
1294 {
1295   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1296   if (!closure->in_group())
1297     closure->inputs()->end_group();
1298 }
1299
1300 // Called by the bison parser to start an AS_NEEDED list.
1301
1302 extern "C" void
1303 script_start_as_needed(void* closurev)
1304 {
1305   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1306   closure->position_dependent_options().set_as_needed();
1307 }
1308
1309 // Called by the bison parser at the end of an AS_NEEDED list.
1310
1311 extern "C" void
1312 script_end_as_needed(void* closurev)
1313 {
1314   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1315   closure->position_dependent_options().clear_as_needed();
1316 }
1317
1318 // Called by the bison parser to parse an OPTION.
1319
1320 extern "C" void
1321 script_parse_option(void* closurev, const char* option)
1322 {
1323   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1324   // We treat the option as a single command-line option, even if
1325   // it has internal whitespace.
1326   if (closure->command_line() == NULL)
1327     {
1328       // There are some options that we could handle here--e.g.,
1329       // -lLIBRARY.  Should we bother?
1330       gold_warning(_("%s: Ignoring command OPTION; OPTION is only valid"
1331                      " for scripts specified via -T"),
1332                    closure->filename());
1333     }
1334   else
1335     {
1336       bool past_a_double_dash_option = false;
1337       char* mutable_option = strdup(option);
1338       closure->command_line()->process_one_option(1, &mutable_option, 0,
1339                                                   &past_a_double_dash_option);
1340       free(mutable_option);
1341     }
1342 }