gold/script.cc

   1 // script.cc -- handle linker scripts for gold.
   2
   3 // Copyright 2006, 2007 Free Software Foundation, Inc.
   4 // Written by Ian Lance Taylor <iant@google.com>.
   5
   6 // This file is part of gold.
   7
   8 // This program is free software; you can redistribute it and/or modify
   9 // it under the terms of the GNU General Public License as published by
  10 // the Free Software Foundation; either version 3 of the License, or
  11 // (at your option) any later version.
  12
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU General Public License for more details.
  17
  18 // You should have received a copy of the GNU General Public License
  19 // along with this program; if not, write to the Free Software
  20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  21 // MA 02110-1301, USA.
  22
  23 #include "gold.h"
  24
  25 #include <string>
  26 #include <vector>
  27 #include <cstdio>
  28 #include <cstdlib>
  29 #include "filenames.h"
  30
  31 #include "options.h"
  32 #include "fileread.h"
  33 #include "workqueue.h"
  34 #include "readsyms.h"
  35 #include "parameters.h"
  36 #include "yyscript.h"
  37 #include "script.h"
  38 #include "script-c.h"
  39
  40 namespace gold
  41 {
  42
  43 // A token read from a script file.  We don't implement keywords here;
  44 // all keywords are simply represented as a string.
  45
  46 class Token
  47 {
  48  public:
  49   // Token classification.
  50   enum Classification
  51   {
  52     // Token is invalid.
  53     TOKEN_INVALID,
  54     // Token indicates end of input.
  55     TOKEN_EOF,
  56     // Token is a string of characters.
  57     TOKEN_STRING,
  58     // Token is an operator.
  59     TOKEN_OPERATOR,
  60     // Token is a number (an integer).
  61     TOKEN_INTEGER
  62   };
  63
  64   // We need an empty constructor so that we can put this STL objects.
  65   Token()
  66     : classification_(TOKEN_INVALID), value_(), opcode_(0),
  67       lineno_(0), charpos_(0)
  68   { }
  69
  70   // A general token with no value.
  71   Token(Classification classification, int lineno, int charpos)
  72     : classification_(classification), value_(), opcode_(0),
  73       lineno_(lineno), charpos_(charpos)
  74   {
  75     gold_assert(classification == TOKEN_INVALID
  76                 || classification == TOKEN_EOF);
  77   }
  78
  79   // A general token with a value.
  80   Token(Classification classification, const std::string& value,
  81         int lineno, int charpos)
  82     : classification_(classification), value_(value), opcode_(0),
  83       lineno_(lineno), charpos_(charpos)
  84   {
  85     gold_assert(classification != TOKEN_INVALID
  86                 && classification != TOKEN_EOF);
  87   }
  88
  89   // A token representing a string of characters.
  90   Token(const std::string& s, int lineno, int charpos)
  91     : classification_(TOKEN_STRING), value_(s), opcode_(0),
  92       lineno_(lineno), charpos_(charpos)
  93   { }
  94
  95   // A token representing an operator.
  96   Token(int opcode, int lineno, int charpos)
  97     : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode),
  98       lineno_(lineno), charpos_(charpos)
  99   { }
 100
 101   // Return whether the token is invalid.
 102   bool
 103   is_invalid() const
 104   { return this->classification_ == TOKEN_INVALID; }
 105
 106   // Return whether this is an EOF token.
 107   bool
 108   is_eof() const
 109   { return this->classification_ == TOKEN_EOF; }
 110
 111   // Return the token classification.
 112   Classification
 113   classification() const
 114   { return this->classification_; }
 115
 116   // Return the line number at which the token starts.
 117   int
 118   lineno() const
 119   { return this->lineno_; }
 120
 121   // Return the character position at this the token starts.
 122   int
 123   charpos() const
 124   { return this->charpos_; }
 125
 126   // Get the value of a token.
 127
 128   const std::string&
 129   string_value() const
 130   {
 131     gold_assert(this->classification_ == TOKEN_STRING);
 132     return this->value_;
 133   }
 134
 135   int
 136   operator_value() const
 137   {
 138     gold_assert(this->classification_ == TOKEN_OPERATOR);
 139     return this->opcode_;
 140   }
 141
 142   int64_t
 143   integer_value() const
 144   {
 145     gold_assert(this->classification_ == TOKEN_INTEGER);
 146     return strtoll(this->value_.c_str(), NULL, 0);
 147   }
 148
 149  private:
 150   // The token classification.
 151   Classification classification_;
 152   // The token value, for TOKEN_STRING or TOKEN_INTEGER.
 153   std::string value_;
 154   // The token value, for TOKEN_OPERATOR.
 155   int opcode_;
 156   // The line number where this token started (one based).
 157   int lineno_;
 158   // The character position within the line where this token started
 159   // (one based).
 160   int charpos_;
 161 };
 162
 163 // This class handles lexing a file into a sequence of tokens.  We
 164 // don't expect linker scripts to be large, so we just read them and
 165 // tokenize them all at once.
 166
 167 class Lex
 168 {
 169  public:
 170   Lex(Input_file* input_file)
 171     : input_file_(input_file), tokens_()
 172   { }
 173
 174   // Tokenize the file.  Return the final token, which will be either
 175   // an invalid token or an EOF token.  An invalid token indicates
 176   // that tokenization failed.
 177   Token
 178   tokenize();
 179
 180   // A token sequence.
 181   typedef std::vector<Token> Token_sequence;
 182
 183   // Return the tokens.
 184   const Token_sequence&
 185   tokens() const
 186   { return this->tokens_; }
 187
 188  private:
 189   Lex(const Lex&);
 190   Lex& operator=(const Lex&);
 191
 192   // Read the file into a string buffer.
 193   void
 194   read_file(std::string*);
 195
 196   // Make a general token with no value at the current location.
 197   Token
 198   make_token(Token::Classification c, const char* p) const
 199   { return Token(c, this->lineno_, p - this->linestart_ + 1); }
 200
 201   // Make a general token with a value at the current location.
 202   Token
 203   make_token(Token::Classification c, const std::string& v, const char* p)
 204     const
 205   { return Token(c, v, this->lineno_, p - this->linestart_ + 1); }
 206
 207   // Make an operator token at the current location.
 208   Token
 209   make_token(int opcode, const char* p) const
 210   { return Token(opcode, this->lineno_, p - this->linestart_ + 1); }
 211
 212   // Make an invalid token at the current location.
 213   Token
 214   make_invalid_token(const char* p)
 215   { return this->make_token(Token::TOKEN_INVALID, p); }
 216
 217   // Make an EOF token at the current location.
 218   Token
 219   make_eof_token(const char* p)
 220   { return this->make_token(Token::TOKEN_EOF, p); }
 221
 222   // Return whether C can be the first character in a name.  C2 is the
 223   // next character, since we sometimes need that.
 224   static inline bool
 225   can_start_name(char c, char c2);
 226
 227   // Return whether C can appear in a name which has already started.
 228   static inline bool
 229   can_continue_name(char c);
 230
 231   // Return whether C, C2, C3 can start a hex number.
 232   static inline bool
 233   can_start_hex(char c, char c2, char c3);
 234
 235   // Return whether C can appear in a hex number.
 236   static inline bool
 237   can_continue_hex(char c);
 238
 239   // Return whether C can start a non-hex number.
 240   static inline bool
 241   can_start_number(char c);
 242
 243   // Return whether C can appear in a non-hex number.
 244   static inline bool
 245   can_continue_number(char c)
 246   { return Lex::can_start_number(c); }
 247
 248   // If C1 C2 C3 form a valid three character operator, return the
 249   // opcode.  Otherwise return 0.
 250   static inline int
 251   three_char_operator(char c1, char c2, char c3);
 252
 253   // If C1 C2 form a valid two character operator, return the opcode.
 254   // Otherwise return 0.
 255   static inline int
 256   two_char_operator(char c1, char c2);
 257
 258   // If C1 is a valid one character operator, return the opcode.
 259   // Otherwise return 0.
 260   static inline int
 261   one_char_operator(char c1);
 262
 263   // Read the next token.
 264   Token
 265   get_token(const char**);
 266
 267   // Skip a C style /* */ comment.  Return false if the comment did
 268   // not end.
 269   bool
 270   skip_c_comment(const char**);
 271
 272   // Skip a line # comment.  Return false if there was no newline.
 273   bool
 274   skip_line_comment(const char**);
 275
 276   // Build a token CLASSIFICATION from all characters that match
 277   // CAN_CONTINUE_FN.  The token starts at START.  Start matching from
 278   // MATCH.  Set *PP to the character following the token.
 279   inline Token
 280   gather_token(Token::Classification, bool (*can_continue_fn)(char),
 281                const char* start, const char* match, const char** pp);
 282
 283   // Build a token from a quoted string.
 284   Token
 285   gather_quoted_string(const char** pp);
 286
 287   // The file we are reading.
 288   Input_file* input_file_;
 289   // The token sequence we create.
 290   Token_sequence tokens_;
 291   // The current line number.
 292   int lineno_;
 293   // The start of the current line in the buffer.
 294   const char* linestart_;
 295 };
 296
 297 // Read the whole file into memory.  We don't expect linker scripts to
 298 // be large, so we just use a std::string as a buffer.  We ignore the
 299 // data we've already read, so that we read aligned buffers.
 300
 301 void
 302 Lex::read_file(std::string* contents)
 303 {
 304   off_t filesize = this->input_file_->file().filesize();
 305   contents->clear();
 306   contents->reserve(filesize);
 307
 308   off_t off = 0;
 309   unsigned char buf[BUFSIZ];
 310   while (off < filesize)
 311     {
 312       off_t get = BUFSIZ;
 313       if (get > filesize - off)
 314         get = filesize - off;
 315       this->input_file_->file().read(off, get, buf);
 316       contents->append(reinterpret_cast<char*>(&buf[0]), get);
 317       off += get;
 318     }
 319 }
 320
 321 // Return whether C can be the start of a name, if the next character
 322 // is C2.  A name can being with a letter, underscore, period, or
 323 // dollar sign.  Because a name can be a file name, we also permit
 324 // forward slash, backslash, and tilde.  Tilde is the tricky case
 325 // here; GNU ld also uses it as a bitwise not operator.  It is only
 326 // recognized as the operator if it is not immediately followed by
 327 // some character which can appear in a symbol.  That is, "~0" is a
 328 // symbol name, and "~ 0" is an expression using bitwise not.  We are
 329 // compatible.
 330
 331 inline bool
 332 Lex::can_start_name(char c, char c2)
 333 {
 334   switch (c)
 335     {
 336     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 337     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 338     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 339     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 340     case 'Y': case 'Z':
 341     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 342     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 343     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 344     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 345     case 'y': case 'z':
 346     case '_': case '.': case '$': case '/': case '\\':
 347       return true;
 348
 349     case '~':
 350       return can_continue_name(c2);
 351
 352     default:
 353       return false;
 354     }
 355 }
 356
 357 // Return whether C can continue a name which has already started.
 358 // Subsequent characters in a name are the same as the leading
 359 // characters, plus digits and "=+-:[],?*".  So in general the linker
 360 // script language requires spaces around operators.
 361
 362 inline bool
 363 Lex::can_continue_name(char c)
 364 {
 365   switch (c)
 366     {
 367     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 368     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 369     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 370     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 371     case 'Y': case 'Z':
 372     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 373     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 374     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 375     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 376     case 'y': case 'z':
 377     case '_': case '.': case '$': case '/': case '\\':
 378     case '~':
 379     case '0': case '1': case '2': case '3': case '4':
 380     case '5': case '6': case '7': case '8': case '9':
 381     case '=': case '+': case '-': case ':': case '[': case ']':
 382     case ',': case '?': case '*':
 383       return true;
 384
 385     default:
 386       return false;
 387     }
 388 }
 389
 390 // For a number we accept 0x followed by hex digits, or any sequence
 391 // of digits.  The old linker accepts leading '$' for hex, and
 392 // trailing HXBOD.  Those are for MRI compatibility and we don't
 393 // accept them.  The old linker also accepts trailing MK for mega or
 394 // kilo.  Those are mentioned in the documentation, and we accept
 395 // them.
 396
 397 // Return whether C1 C2 C3 can start a hex number.
 398
 399 inline bool
 400 Lex::can_start_hex(char c1, char c2, char c3)
 401 {
 402   if (c1 == '0' && (c2 == 'x' || c2 == 'X'))
 403     return Lex::can_continue_hex(c3);
 404   return false;
 405 }
 406
 407 // Return whether C can appear in a hex number.
 408
 409 inline bool
 410 Lex::can_continue_hex(char c)
 411 {
 412   switch (c)
 413     {
 414     case '0': case '1': case '2': case '3': case '4':
 415     case '5': case '6': case '7': case '8': case '9':
 416     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 417     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 418       return true;
 419
 420     default:
 421       return false;
 422     }
 423 }
 424
 425 // Return whether C can start a non-hex number.
 426
 427 inline bool
 428 Lex::can_start_number(char c)
 429 {
 430   switch (c)
 431     {
 432     case '0': case '1': case '2': case '3': case '4':
 433     case '5': case '6': case '7': case '8': case '9':
 434       return true;
 435
 436     default:
 437       return false;
 438     }
 439 }
 440
 441 // If C1 C2 C3 form a valid three character operator, return the
 442 // opcode (defined in the yyscript.h file generated from yyscript.y).
 443 // Otherwise return 0.
 444
 445 inline int
 446 Lex::three_char_operator(char c1, char c2, char c3)
 447 {
 448   switch (c1)
 449     {
 450     case '<':
 451       if (c2 == '<' && c3 == '=')
 452         return LSHIFTEQ;
 453       break;
 454     case '>':
 455       if (c2 == '>' && c3 == '=')
 456         return RSHIFTEQ;
 457       break;
 458     default:
 459       break;
 460     }
 461   return 0;
 462 }
 463
 464 // If C1 C2 form a valid two character operator, return the opcode
 465 // (defined in the yyscript.h file generated from yyscript.y).
 466 // Otherwise return 0.
 467
 468 inline int
 469 Lex::two_char_operator(char c1, char c2)
 470 {
 471   switch (c1)
 472     {
 473     case '=':
 474       if (c2 == '=')
 475         return EQ;
 476       break;
 477     case '!':
 478       if (c2 == '=')
 479         return NE;
 480       break;
 481     case '+':
 482       if (c2 == '=')
 483         return PLUSEQ;
 484       break;
 485     case '-':
 486       if (c2 == '=')
 487         return MINUSEQ;
 488       break;
 489     case '*':
 490       if (c2 == '=')
 491         return MULTEQ;
 492       break;
 493     case '/':
 494       if (c2 == '=')
 495         return DIVEQ;
 496       break;
 497     case '|':
 498       if (c2 == '=')
 499         return OREQ;
 500       if (c2 == '|')
 501         return OROR;
 502       break;
 503     case '&':
 504       if (c2 == '=')
 505         return ANDEQ;
 506       if (c2 == '&')
 507         return ANDAND;
 508       break;
 509     case '>':
 510       if (c2 == '=')
 511         return GE;
 512       if (c2 == '>')
 513         return RSHIFT;
 514       break;
 515     case '<':
 516       if (c2 == '=')
 517         return LE;
 518       if (c2 == '<')
 519         return LSHIFT;
 520       break;
 521     default:
 522       break;
 523     }
 524   return 0;
 525 }
 526
 527 // If C1 is a valid operator, return the opcode.  Otherwise return 0.
 528
 529 inline int
 530 Lex::one_char_operator(char c1)
 531 {
 532   switch (c1)
 533     {
 534     case '+':
 535     case '-':
 536     case '*':
 537     case '/':
 538     case '%':
 539     case '!':
 540     case '&':
 541     case '|':
 542     case '^':
 543     case '~':
 544     case '<':
 545     case '>':
 546     case '=':
 547     case '?':
 548     case ',':
 549     case '(':
 550     case ')':
 551     case '{':
 552     case '}':
 553     case '[':
 554     case ']':
 555     case ':':
 556     case ';':
 557       return c1;
 558     default:
 559       return 0;
 560     }
 561 }
 562
 563 // Skip a C style comment.  *PP points to just after the "/*".  Return
 564 // false if the comment did not end.
 565
 566 bool
 567 Lex::skip_c_comment(const char** pp)
 568 {
 569   const char* p = *pp;
 570   while (p[0] != '*' || p[1] != '/')
 571     {
 572       if (*p == '\0')
 573         {
 574           *pp = p;
 575           return false;
 576         }
 577
 578       if (*p == '\n')
 579         {
 580           ++this->lineno_;
 581           this->linestart_ = p + 1;
 582         }
 583       ++p;
 584     }
 585
 586   *pp = p + 2;
 587   return true;
 588 }
 589
 590 // Skip a line # comment.  Return false if there was no newline.
 591
 592 bool
 593 Lex::skip_line_comment(const char** pp)
 594 {
 595   const char* p = *pp;
 596   size_t skip = strcspn(p, "\n");
 597   if (p[skip] == '\0')
 598     {
 599       *pp = p + skip;
 600       return false;
 601     }
 602
 603   p += skip + 1;
 604   ++this->lineno_;
 605   this->linestart_ = p;
 606   *pp = p;
 607
 608   return true;
 609 }
 610
 611 // Build a token CLASSIFICATION from all characters that match
 612 // CAN_CONTINUE_FN.  Update *PP.
 613
 614 inline Token
 615 Lex::gather_token(Token::Classification classification,
 616                   bool (*can_continue_fn)(char),
 617                   const char* start,
 618                   const char* match,
 619                   const char **pp)
 620 {
 621   while ((*can_continue_fn)(*match))
 622     ++match;
 623   *pp = match;
 624   return this->make_token(classification,
 625                           std::string(start, match - start),
 626                           start);
 627 }
 628
 629 // Build a token from a quoted string.
 630
 631 Token
 632 Lex::gather_quoted_string(const char** pp)
 633 {
 634   const char* start = *pp;
 635   const char* p = start;
 636   ++p;
 637   size_t skip = strcspn(p, "\"\n");
 638   if (p[skip] != '"')
 639     return this->make_invalid_token(start);
 640   *pp = p + skip + 1;
 641   return this->make_token(Token::TOKEN_STRING,
 642                           std::string(p, skip),
 643                           start);
 644 }
 645
 646 // Return the next token at *PP.  Update *PP.  General guideline: we
 647 // require linker scripts to be simple ASCII.  No unicode linker
 648 // scripts.  In particular we can assume that any '\0' is the end of
 649 // the input.
 650
 651 Token
 652 Lex::get_token(const char** pp)
 653 {
 654   const char* p = *pp;
 655
 656   while (true)
 657     {
 658       if (*p == '\0')
 659         {
 660           *pp = p;
 661           return this->make_eof_token(p);
 662         }
 663
 664       // Skip whitespace quickly.
 665       while (*p == ' ' || *p == '\t')
 666         ++p;
 667
 668       if (*p == '\n')
 669         {
 670           ++p;
 671           ++this->lineno_;
 672           this->linestart_ = p;
 673           continue;
 674         }
 675
 676       // Skip C style comments.
 677       if (p[0] == '/' && p[1] == '*')
 678         {
 679           int lineno = this->lineno_;
 680           int charpos = p - this->linestart_ + 1;
 681
 682           *pp = p + 2;
 683           if (!this->skip_c_comment(pp))
 684             return Token(Token::TOKEN_INVALID, lineno, charpos);
 685           p = *pp;
 686
 687           continue;
 688         }
 689
 690       // Skip line comments.
 691       if (*p == '#')
 692         {
 693           *pp = p + 1;
 694           if (!this->skip_line_comment(pp))
 695             return this->make_eof_token(p);
 696           p = *pp;
 697           continue;
 698         }
 699
 700       // Check for a name.
 701       if (Lex::can_start_name(p[0], p[1]))
 702         return this->gather_token(Token::TOKEN_STRING,
 703                                   Lex::can_continue_name,
 704                                   p, p + 2, pp);
 705
 706       // We accept any arbitrary name in double quotes, as long as it
 707       // does not cross a line boundary.
 708       if (*p == '"')
 709         {
 710           *pp = p;
 711           return this->gather_quoted_string(pp);
 712         }
 713
 714       // Check for a number.
 715
 716       if (Lex::can_start_hex(p[0], p[1], p[2]))
 717         return this->gather_token(Token::TOKEN_INTEGER,
 718                                   Lex::can_continue_hex,
 719                                   p, p + 3, pp);
 720
 721       if (Lex::can_start_number(p[0]))
 722         return this->gather_token(Token::TOKEN_INTEGER,
 723                                   Lex::can_continue_number,
 724                                   p, p + 1, pp);
 725
 726       // Check for operators.
 727
 728       int opcode = Lex::three_char_operator(p[0], p[1], p[2]);
 729       if (opcode != 0)
 730         {
 731           *pp = p + 3;
 732           return this->make_token(opcode, p);
 733         }
 734
 735       opcode = Lex::two_char_operator(p[0], p[1]);
 736       if (opcode != 0)
 737         {
 738           *pp = p + 2;
 739           return this->make_token(opcode, p);
 740         }
 741
 742       opcode = Lex::one_char_operator(p[0]);
 743       if (opcode != 0)
 744         {
 745           *pp = p + 1;
 746           return this->make_token(opcode, p);
 747         }
 748
 749       return this->make_token(Token::TOKEN_INVALID, p);
 750     }
 751 }
 752
 753 // Tokenize the file.  Return the final token.
 754
 755 Token
 756 Lex::tokenize()
 757 {
 758   std::string contents;
 759   this->read_file(&contents);
 760
 761   const char* p = contents.c_str();
 762
 763   this->lineno_ = 1;
 764   this->linestart_ = p;
 765
 766   while (true)
 767     {
 768       Token t(this->get_token(&p));
 769
 770       // Don't let an early null byte fool us into thinking that we've
 771       // reached the end of the file.
 772       if (t.is_eof()
 773           && static_cast<size_t>(p - contents.c_str()) < contents.length())
 774         t = this->make_invalid_token(p);
 775
 776       if (t.is_invalid() || t.is_eof())
 777         return t;
 778
 779       this->tokens_.push_back(t);
 780     }
 781 }
 782
 783 // A trivial task which waits for THIS_BLOCKER to be clear and then
 784 // clears NEXT_BLOCKER.  THIS_BLOCKER may be NULL.
 785
 786 class Script_unblock : public Task
 787 {
 788  public:
 789   Script_unblock(Task_token* this_blocker, Task_token* next_blocker)
 790     : this_blocker_(this_blocker), next_blocker_(next_blocker)
 791   { }
 792
 793   ~Script_unblock()
 794   {
 795     if (this->this_blocker_ != NULL)
 796       delete this->this_blocker_;
 797   }
 798
 799   Is_runnable_type
 800   is_runnable(Workqueue*)
 801   {
 802     if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked())
 803       return IS_BLOCKED;
 804     return IS_RUNNABLE;
 805   }
 806
 807   Task_locker*
 808   locks(Workqueue* workqueue)
 809   {
 810     return new Task_locker_block(*this->next_blocker_, workqueue);
 811   }
 812
 813   void
 814   run(Workqueue*)
 815   { }
 816
 817  private:
 818   Task_token* this_blocker_;
 819   Task_token* next_blocker_;
 820 };
 821
 822 // This class holds data passed through the parser to the lexer and to
 823 // the parser support functions.  This avoids global variables.  We
 824 // can't use global variables because we need not be called in the
 825 // main thread.
 826
 827 class Parser_closure
 828 {
 829  public:
 830   Parser_closure(const char* filename,
 831                  const Position_dependent_options& posdep_options,
 832                  bool in_group, bool is_in_sysroot,
 833                  const Lex::Token_sequence* tokens)
 834     : filename_(filename), posdep_options_(posdep_options),
 835       in_group_(in_group), is_in_sysroot_(is_in_sysroot), tokens_(tokens),
 836       next_token_index_(0), inputs_(NULL)
 837   { }
 838
 839   // Return the file name.
 840   const char*
 841   filename() const
 842   { return this->filename_; }
 843
 844   // Return the position dependent options.  The caller may modify
 845   // this.
 846   Position_dependent_options&
 847   position_dependent_options()
 848   { return this->posdep_options_; }
 849
 850   // Return whether this script is being run in a group.
 851   bool
 852   in_group() const
 853   { return this->in_group_; }
 854
 855   // Return whether this script was found using a directory in the
 856   // sysroot.
 857   bool
 858   is_in_sysroot() const
 859   { return this->is_in_sysroot_; }
 860
 861   // Whether we are at the end of the token list.
 862   bool
 863   at_eof() const
 864   { return this->next_token_index_ >= this->tokens_->size(); }
 865
 866   // Return the next token.
 867   const Token*
 868   next_token()
 869   {
 870     const Token* ret = &(*this->tokens_)[this->next_token_index_];
 871     ++this->next_token_index_;
 872     return ret;
 873   }
 874
 875   // Return the list of input files, creating it if necessary.  This
 876   // is a space leak--we never free the INPUTS_ pointer.
 877   Input_arguments*
 878   inputs()
 879   {
 880     if (this->inputs_ == NULL)
 881       this->inputs_ = new Input_arguments();
 882     return this->inputs_;
 883   }
 884
 885   // Return whether we saw any input files.
 886   bool
 887   saw_inputs() const
 888   { return this->inputs_ != NULL && !this->inputs_->empty(); }
 889
 890  private:
 891   // The name of the file we are reading.
 892   const char* filename_;
 893   // The position dependent options.
 894   Position_dependent_options posdep_options_;
 895   // Whether we are currently in a --start-group/--end-group.
 896   bool in_group_;
 897   // Whether the script was found in a sysrooted directory.
 898   bool is_in_sysroot_;
 899
 900   // The tokens to be returned by the lexer.
 901   const Lex::Token_sequence* tokens_;
 902   // The index of the next token to return.
 903   unsigned int next_token_index_;
 904   // New input files found to add to the link.
 905   Input_arguments* inputs_;
 906 };
 907
 908 // FILE was found as an argument on the command line.  Try to read it
 909 // as a script.  We've already read BYTES of data into P, but we
 910 // ignore that.  Return true if the file was handled.
 911
 912 bool
 913 read_input_script(Workqueue* workqueue, const General_options& options,
 914                   Symbol_table* symtab, Layout* layout,
 915                   const Dirsearch& dirsearch, Input_objects* input_objects,
 916                   Input_group* input_group,
 917                   const Input_argument* input_argument,
 918                   Input_file* input_file, const unsigned char*, off_t,
 919                   Task_token* this_blocker, Task_token* next_blocker)
 920 {
 921   Lex lex(input_file);
 922   if (lex.tokenize().is_invalid())
 923     return false;
 924
 925   Parser_closure closure(input_file->filename().c_str(),
 926                          input_argument->file().options(),
 927                          input_group != NULL,
 928                          input_file->is_in_sysroot(),
 929                          &lex.tokens());
 930
 931   if (yyparse(&closure) != 0)
 932     return false;
 933
 934   // THIS_BLOCKER must be clear before we may add anything to the
 935   // symbol table.  We are responsible for unblocking NEXT_BLOCKER
 936   // when we are done.  We are responsible for deleting THIS_BLOCKER
 937   // when it is unblocked.
 938
 939   if (!closure.saw_inputs())
 940     {
 941       // The script did not add any files to read.  Note that we are
 942       // not permitted to call NEXT_BLOCKER->unblock() here even if
 943       // THIS_BLOCKER is NULL, as we are not in the main thread.
 944       workqueue->queue(new Script_unblock(this_blocker, next_blocker));
 945       return true;
 946     }
 947
 948   for (Input_arguments::const_iterator p = closure.inputs()->begin();
 949        p != closure.inputs()->end();
 950        ++p)
 951     {
 952       Task_token* nb;
 953       if (p + 1 == closure.inputs()->end())
 954         nb = next_blocker;
 955       else
 956         {
 957           nb = new Task_token();
 958           nb->add_blocker();
 959         }
 960       workqueue->queue(new Read_symbols(options, input_objects, symtab,
 961                                         layout, dirsearch, &*p,
 962                                         input_group, this_blocker, nb));
 963       this_blocker = nb;
 964     }
 965
 966   return true;
 967 }
 968
 969 // Manage mapping from keywords to the codes expected by the bison
 970 // parser.
 971
 972 class Keyword_to_parsecode
 973 {
 974  public:
 975   // The structure which maps keywords to parsecodes.
 976   struct Keyword_parsecode
 977   {
 978     // Keyword.
 979     const char* keyword;
 980     // Corresponding parsecode.
 981     int parsecode;
 982   };
 983
 984   // Return the parsecode corresponding KEYWORD, or 0 if it is not a
 985   // keyword.
 986   static int
 987   keyword_to_parsecode(const char* keyword);
 988
 989  private:
 990   // The array of all keywords.
 991   static const Keyword_parsecode keyword_parsecodes_[];
 992
 993   // The number of keywords.
 994   static const int keyword_count;
 995 };
 996
 997 // Mapping from keyword string to keyword parsecode.  This array must
 998 // be kept in sorted order.  Parsecodes are looked up using bsearch.
 999 // This array must correspond to the list of parsecodes in yyscript.y.
1000
1001 const Keyword_to_parsecode::Keyword_parsecode
1002 Keyword_to_parsecode::keyword_parsecodes_[] =
1003 {
1004   { "ABSOLUTE", ABSOLUTE },
1005   { "ADDR", ADDR },
1006   { "ALIGN", ALIGN_K },
1007   { "ASSERT", ASSERT_K },
1008   { "AS_NEEDED", AS_NEEDED },
1009   { "AT", AT },
1010   { "BIND", BIND },
1011   { "BLOCK", BLOCK },
1012   { "BYTE", BYTE },
1013   { "CONSTANT", CONSTANT },
1014   { "CONSTRUCTORS", CONSTRUCTORS },
1015   { "COPY", COPY },
1016   { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS },
1017   { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN },
1018   { "DATA_SEGMENT_END", DATA_SEGMENT_END },
1019   { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END },
1020   { "DEFINED", DEFINED },
1021   { "DSECT", DSECT },
1022   { "ENTRY", ENTRY },
1023   { "EXCLUDE_FILE", EXCLUDE_FILE },
1024   { "EXTERN", EXTERN },
1025   { "FILL", FILL },
1026   { "FLOAT", FLOAT },
1027   { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION },
1028   { "GROUP", GROUP },
1029   { "HLL", HLL },
1030   { "INCLUDE", INCLUDE },
1031   { "INFO", INFO },
1032   { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION },
1033   { "INPUT", INPUT },
1034   { "KEEP", KEEP },
1035   { "LENGTH", LENGTH },
1036   { "LOADADDR", LOADADDR },
1037   { "LONG", LONG },
1038   { "MAP", MAP },
1039   { "MAX", MAX_K },
1040   { "MEMORY", MEMORY },
1041   { "MIN", MIN_K },
1042   { "NEXT", NEXT },
1043   { "NOCROSSREFS", NOCROSSREFS },
1044   { "NOFLOAT", NOFLOAT },
1045   { "NOLOAD", NOLOAD },
1046   { "ONLY_IF_RO", ONLY_IF_RO },
1047   { "ONLY_IF_RW", ONLY_IF_RW },
1048   { "OPTION", OPTION },
1049   { "ORIGIN", ORIGIN },
1050   { "OUTPUT", OUTPUT },
1051   { "OUTPUT_ARCH", OUTPUT_ARCH },
1052   { "OUTPUT_FORMAT", OUTPUT_FORMAT },
1053   { "OVERLAY", OVERLAY },
1054   { "PHDRS", PHDRS },
1055   { "PROVIDE", PROVIDE },
1056   { "PROVIDE_HIDDEN", PROVIDE_HIDDEN },
1057   { "QUAD", QUAD },
1058   { "SEARCH_DIR", SEARCH_DIR },
1059   { "SECTIONS", SECTIONS },
1060   { "SEGMENT_START", SEGMENT_START },
1061   { "SHORT", SHORT },
1062   { "SIZEOF", SIZEOF },
1063   { "SIZEOF_HEADERS", SIZEOF_HEADERS },
1064   { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT },
1065   { "SORT_BY_NAME", SORT_BY_NAME },
1066   { "SPECIAL", SPECIAL },
1067   { "SQUAD", SQUAD },
1068   { "STARTUP", STARTUP },
1069   { "SUBALIGN", SUBALIGN },
1070   { "SYSLIB", SYSLIB },
1071   { "TARGET", TARGET_K },
1072   { "TRUNCATE", TRUNCATE },
1073   { "VERSION", VERSIONK },
1074   { "global", GLOBAL },
1075   { "l", LENGTH },
1076   { "len", LENGTH },
1077   { "local", LOCAL },
1078   { "o", ORIGIN },
1079   { "org", ORIGIN },
1080   { "sizeof_headers", SIZEOF_HEADERS },
1081 };
1082
1083 const int Keyword_to_parsecode::keyword_count =
1084   (sizeof(Keyword_to_parsecode::keyword_parsecodes_)
1085    / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]));
1086
1087 // Comparison function passed to bsearch.
1088
1089 extern "C"
1090 {
1091
1092 static int
1093 ktt_compare(const void* keyv, const void* kttv)
1094 {
1095   const char* key = static_cast<const char*>(keyv);
1096   const Keyword_to_parsecode::Keyword_parsecode* ktt =
1097     static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv);
1098   return strcmp(key, ktt->keyword);
1099 }
1100
1101 } // End extern "C".
1102
1103 int
1104 Keyword_to_parsecode::keyword_to_parsecode(const char* keyword)
1105 {
1106   void* kttv = bsearch(keyword,
1107                        Keyword_to_parsecode::keyword_parsecodes_,
1108                        Keyword_to_parsecode::keyword_count,
1109                        sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]),
1110                        ktt_compare);
1111   if (kttv == NULL)
1112     return 0;
1113   Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv);
1114   return ktt->parsecode;
1115 }
1116
1117 } // End namespace gold.
1118
1119 // The remaining functions are extern "C", so it's clearer to not put
1120 // them in namespace gold.
1121
1122 using namespace gold;
1123
1124 // This function is called by the bison parser to return the next
1125 // token.
1126
1127 extern "C" int
1128 yylex(YYSTYPE* lvalp, void* closurev)
1129 {
1130   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1131
1132   if (closure->at_eof())
1133     return 0;
1134
1135   const Token* token = closure->next_token();
1136
1137   switch (token->classification())
1138     {
1139     default:
1140     case Token::TOKEN_INVALID:
1141     case Token::TOKEN_EOF:
1142       gold_unreachable();
1143
1144     case Token::TOKEN_STRING:
1145       {
1146         const char* str = token->string_value().c_str();
1147         int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str);
1148         if (parsecode != 0)
1149           return parsecode;
1150         lvalp->string = str;
1151         return STRING;
1152       }
1153
1154     case Token::TOKEN_OPERATOR:
1155       return token->operator_value();
1156
1157     case Token::TOKEN_INTEGER:
1158       lvalp->integer = token->integer_value();
1159       return INTEGER;
1160     }
1161 }
1162
1163 // This function is called by the bison parser to report an error.
1164
1165 extern "C" void
1166 yyerror(void* closurev, const char* message)
1167 {
1168   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1169
1170   gold_error(_("%s: %s"), closure->filename(), message);
1171 }
1172
1173 // Called by the bison parser to add a file to the link.
1174
1175 extern "C" void
1176 script_add_file(void* closurev, const char* name)
1177 {
1178   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1179
1180   // If this is an absolute path, and we found the script in the
1181   // sysroot, then we want to prepend the sysroot to the file name.
1182   // For example, this is how we handle a cross link to the x86_64
1183   // libc.so, which refers to /lib/libc.so.6.
1184   std::string name_string;
1185   const char* extra_search_path = ".";
1186   std::string script_directory;
1187   if (IS_ABSOLUTE_PATH (name))
1188     {
1189       if (closure->is_in_sysroot())
1190         {
1191           const std::string& sysroot(parameters->sysroot());
1192           gold_assert(!sysroot.empty());
1193           name_string = sysroot + name;
1194           name = name_string.c_str();
1195         }
1196     }
1197   else
1198     {
1199       // In addition to checking the normal library search path, we
1200       // also want to check in the script-directory.
1201       const char *slash = strrchr(closure->filename(), '/');
1202       if (slash != NULL)
1203         {
1204           script_directory.assign(closure->filename(),
1205                                   slash - closure->filename() + 1);
1206           extra_search_path = script_directory.c_str();
1207         }
1208     }
1209
1210   Input_file_argument file(name, false, extra_search_path,
1211                            closure->position_dependent_options());
1212   closure->inputs()->add_file(file);
1213 }
1214
1215 // Called by the bison parser to start a group.  If we are already in
1216 // a group, that means that this script was invoked within a
1217 // --start-group --end-group sequence on the command line, or that
1218 // this script was found in a GROUP of another script.  In that case,
1219 // we simply continue the existing group, rather than starting a new
1220 // one.  It is possible to construct a case in which this will do
1221 // something other than what would happen if we did a recursive group,
1222 // but it's hard to imagine why the different behaviour would be
1223 // useful for a real program.  Avoiding recursive groups is simpler
1224 // and more efficient.
1225
1226 extern "C" void
1227 script_start_group(void* closurev)
1228 {
1229   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1230   if (!closure->in_group())
1231     closure->inputs()->start_group();
1232 }
1233
1234 // Called by the bison parser at the end of a group.
1235
1236 extern "C" void
1237 script_end_group(void* closurev)
1238 {
1239   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1240   if (!closure->in_group())
1241     closure->inputs()->end_group();
1242 }
1243
1244 // Called by the bison parser to start an AS_NEEDED list.
1245
1246 extern "C" void
1247 script_start_as_needed(void* closurev)
1248 {
1249   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1250   closure->position_dependent_options().set_as_needed();
1251 }
1252
1253 // Called by the bison parser at the end of an AS_NEEDED list.
1254
1255 extern "C" void
1256 script_end_as_needed(void* closurev)
1257 {
1258   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1259   closure->position_dependent_options().clear_as_needed();
1260 }
1261
1262 // Called by the bison parser to parse an OPTION.
1263
1264 extern "C" void
1265 script_parse_option(void* closurev, const char* option)
1266 {
1267   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1268   printf("%s: Saw option %s\n", closure->filename(), option);  //!!
1269 }