gold/dwarf_reader.cc

   1 // dwarf_reader.cc -- parse dwarf2/3 debug information
   2
   3 // Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4 // Written by Ian Lance Taylor <iant@google.com>.
   5
   6 // This file is part of gold.
   7
   8 // This program is free software; you can redistribute it and/or modify
   9 // it under the terms of the GNU General Public License as published by
  10 // the Free Software Foundation; either version 3 of the License, or
  11 // (at your option) any later version.
  12
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU General Public License for more details.
  17
  18 // You should have received a copy of the GNU General Public License
  19 // along with this program; if not, write to the Free Software
  20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  21 // MA 02110-1301, USA.
  22
  23 #include "gold.h"
  24
  25 #include <algorithm>
  26 #include <vector>
  27
  28 #include "elfcpp_swap.h"
  29 #include "dwarf.h"
  30 #include "object.h"
  31 #include "parameters.h"
  32 #include "reloc.h"
  33 #include "dwarf_reader.h"
  34 #include "int_encoding.h"
  35 #include "compressed_output.h"
  36
  37 namespace gold {
  38
  39 struct LineStateMachine
  40 {
  41   int file_num;
  42   uint64_t address;
  43   int line_num;
  44   int column_num;
  45   unsigned int shndx;    // the section address refers to
  46   bool is_stmt;          // stmt means statement.
  47   bool basic_block;
  48   bool end_sequence;
  49 };
  50
  51 static void
  52 ResetLineStateMachine(struct LineStateMachine* lsm, bool default_is_stmt)
  53 {
  54   lsm->file_num = 1;
  55   lsm->address = 0;
  56   lsm->line_num = 1;
  57   lsm->column_num = 0;
  58   lsm->shndx = -1U;
  59   lsm->is_stmt = default_is_stmt;
  60   lsm->basic_block = false;
  61   lsm->end_sequence = false;
  62 }
  63
  64 template<int size, bool big_endian>
  65 Sized_dwarf_line_info<size, big_endian>::Sized_dwarf_line_info(
  66     Object* object,
  67     unsigned int read_shndx)
  68   : data_valid_(false), buffer_(NULL), buffer_start_(NULL),
  69     symtab_buffer_(NULL), directories_(), files_(), current_header_index_(-1)
  70 {
  71   unsigned int debug_shndx;
  72
  73   for (debug_shndx = 1; debug_shndx < object->shnum(); ++debug_shndx)
  74     {
  75       // FIXME: do this more efficiently: section_name() isn't super-fast
  76       std::string name = object->section_name(debug_shndx);
  77       if (name == ".debug_line" || name == ".zdebug_line")
  78         {
  79           section_size_type buffer_size;
  80           bool is_new = false;
  81           this->buffer_ = object->decompressed_section_contents(debug_shndx,
  82                                                                 &buffer_size,
  83                                                                 &is_new);
  84           if (is_new)
  85             this->buffer_start_ = this->buffer_;
  86           this->buffer_end_ = this->buffer_ + buffer_size;
  87           break;
  88         }
  89     }
  90   if (this->buffer_ == NULL)
  91     return;
  92
  93   // Find the relocation section for ".debug_line".
  94   // We expect these for relobjs (.o's) but not dynobjs (.so's).
  95   bool got_relocs = false;
  96   for (unsigned int reloc_shndx = 0;
  97        reloc_shndx < object->shnum();
  98        ++reloc_shndx)
  99     {
 100       unsigned int reloc_sh_type = object->section_type(reloc_shndx);
 101       if ((reloc_sh_type == elfcpp::SHT_REL
 102            || reloc_sh_type == elfcpp::SHT_RELA)
 103           && object->section_info(reloc_shndx) == debug_shndx)
 104         {
 105           got_relocs = this->track_relocs_.initialize(object, reloc_shndx,
 106                                                       reloc_sh_type);
 107           this->track_relocs_type_ = reloc_sh_type;
 108           break;
 109         }
 110     }
 111
 112   // Finally, we need the symtab section to interpret the relocs.
 113   if (got_relocs)
 114     {
 115       unsigned int symtab_shndx;
 116       for (symtab_shndx = 0; symtab_shndx < object->shnum(); ++symtab_shndx)
 117         if (object->section_type(symtab_shndx) == elfcpp::SHT_SYMTAB)
 118           {
 119             this->symtab_buffer_ = object->section_contents(
 120                 symtab_shndx, &this->symtab_buffer_size_, false);
 121             break;
 122           }
 123       if (this->symtab_buffer_ == NULL)
 124         return;
 125     }
 126
 127   // Now that we have successfully read all the data, parse the debug
 128   // info.
 129   this->data_valid_ = true;
 130   this->read_line_mappings(object, read_shndx);
 131 }
 132
 133 // Read the DWARF header.
 134
 135 template<int size, bool big_endian>
 136 const unsigned char*
 137 Sized_dwarf_line_info<size, big_endian>::read_header_prolog(
 138     const unsigned char* lineptr)
 139 {
 140   uint32_t initial_length = elfcpp::Swap_unaligned<32, big_endian>::readval(lineptr);
 141   lineptr += 4;
 142
 143   // In DWARF2/3, if the initial length is all 1 bits, then the offset
 144   // size is 8 and we need to read the next 8 bytes for the real length.
 145   if (initial_length == 0xffffffff)
 146     {
 147       header_.offset_size = 8;
 148       initial_length = elfcpp::Swap_unaligned<64, big_endian>::readval(lineptr);
 149       lineptr += 8;
 150     }
 151   else
 152     header_.offset_size = 4;
 153
 154   header_.total_length = initial_length;
 155
 156   gold_assert(lineptr + header_.total_length <= buffer_end_);
 157
 158   header_.version = elfcpp::Swap_unaligned<16, big_endian>::readval(lineptr);
 159   lineptr += 2;
 160
 161   if (header_.offset_size == 4)
 162     header_.prologue_length = elfcpp::Swap_unaligned<32, big_endian>::readval(lineptr);
 163   else
 164     header_.prologue_length = elfcpp::Swap_unaligned<64, big_endian>::readval(lineptr);
 165   lineptr += header_.offset_size;
 166
 167   header_.min_insn_length = *lineptr;
 168   lineptr += 1;
 169
 170   header_.default_is_stmt = *lineptr;
 171   lineptr += 1;
 172
 173   header_.line_base = *reinterpret_cast<const signed char*>(lineptr);
 174   lineptr += 1;
 175
 176   header_.line_range = *lineptr;
 177   lineptr += 1;
 178
 179   header_.opcode_base = *lineptr;
 180   lineptr += 1;
 181
 182   header_.std_opcode_lengths.resize(header_.opcode_base + 1);
 183   header_.std_opcode_lengths[0] = 0;
 184   for (int i = 1; i < header_.opcode_base; i++)
 185     {
 186       header_.std_opcode_lengths[i] = *lineptr;
 187       lineptr += 1;
 188     }
 189
 190   return lineptr;
 191 }
 192
 193 // The header for a debug_line section is mildly complicated, because
 194 // the line info is very tightly encoded.
 195
 196 template<int size, bool big_endian>
 197 const unsigned char*
 198 Sized_dwarf_line_info<size, big_endian>::read_header_tables(
 199     const unsigned char* lineptr)
 200 {
 201   ++this->current_header_index_;
 202
 203   // Create a new directories_ entry and a new files_ entry for our new
 204   // header.  We initialize each with a single empty element, because
 205   // dwarf indexes directory and filenames starting at 1.
 206   gold_assert(static_cast<int>(this->directories_.size())
 207               == this->current_header_index_);
 208   gold_assert(static_cast<int>(this->files_.size())
 209               == this->current_header_index_);
 210   this->directories_.push_back(std::vector<std::string>(1));
 211   this->files_.push_back(std::vector<std::pair<int, std::string> >(1));
 212
 213   // It is legal for the directory entry table to be empty.
 214   if (*lineptr)
 215     {
 216       int dirindex = 1;
 217       while (*lineptr)
 218         {
 219           const char* dirname = reinterpret_cast<const char*>(lineptr);
 220           gold_assert(dirindex
 221                       == static_cast<int>(this->directories_.back().size()));
 222           this->directories_.back().push_back(dirname);
 223           lineptr += this->directories_.back().back().size() + 1;
 224           dirindex++;
 225         }
 226     }
 227   lineptr++;
 228
 229   // It is also legal for the file entry table to be empty.
 230   if (*lineptr)
 231     {
 232       int fileindex = 1;
 233       size_t len;
 234       while (*lineptr)
 235         {
 236           const char* filename = reinterpret_cast<const char*>(lineptr);
 237           lineptr += strlen(filename) + 1;
 238
 239           uint64_t dirindex = read_unsigned_LEB_128(lineptr, &len);
 240           lineptr += len;
 241
 242           if (dirindex >= this->directories_.back().size())
 243             dirindex = 0;
 244           int dirindexi = static_cast<int>(dirindex);
 245
 246           read_unsigned_LEB_128(lineptr, &len);   // mod_time
 247           lineptr += len;
 248
 249           read_unsigned_LEB_128(lineptr, &len);   // filelength
 250           lineptr += len;
 251
 252           gold_assert(fileindex
 253                       == static_cast<int>(this->files_.back().size()));
 254           this->files_.back().push_back(std::make_pair(dirindexi, filename));
 255           fileindex++;
 256         }
 257     }
 258   lineptr++;
 259
 260   return lineptr;
 261 }
 262
 263 // Process a single opcode in the .debug.line structure.
 264
 265 template<int size, bool big_endian>
 266 bool
 267 Sized_dwarf_line_info<size, big_endian>::process_one_opcode(
 268     const unsigned char* start, struct LineStateMachine* lsm, size_t* len)
 269 {
 270   size_t oplen = 0;
 271   size_t templen;
 272   unsigned char opcode = *start;
 273   oplen++;
 274   start++;
 275
 276   // If the opcode is great than the opcode_base, it is a special
 277   // opcode. Most line programs consist mainly of special opcodes.
 278   if (opcode >= header_.opcode_base)
 279     {
 280       opcode -= header_.opcode_base;
 281       const int advance_address = ((opcode / header_.line_range)
 282                                    * header_.min_insn_length);
 283       lsm->address += advance_address;
 284
 285       const int advance_line = ((opcode % header_.line_range)
 286                                 + header_.line_base);
 287       lsm->line_num += advance_line;
 288       lsm->basic_block = true;
 289       *len = oplen;
 290       return true;
 291     }
 292
 293   // Otherwise, we have the regular opcodes
 294   switch (opcode)
 295     {
 296     case elfcpp::DW_LNS_copy:
 297       lsm->basic_block = false;
 298       *len = oplen;
 299       return true;
 300
 301     case elfcpp::DW_LNS_advance_pc:
 302       {
 303         const uint64_t advance_address
 304             = read_unsigned_LEB_128(start, &templen);
 305         oplen += templen;
 306         lsm->address += header_.min_insn_length * advance_address;
 307       }
 308       break;
 309
 310     case elfcpp::DW_LNS_advance_line:
 311       {
 312         const uint64_t advance_line = read_signed_LEB_128(start, &templen);
 313         oplen += templen;
 314         lsm->line_num += advance_line;
 315       }
 316       break;
 317
 318     case elfcpp::DW_LNS_set_file:
 319       {
 320         const uint64_t fileno = read_unsigned_LEB_128(start, &templen);
 321         oplen += templen;
 322         lsm->file_num = fileno;
 323       }
 324       break;
 325
 326     case elfcpp::DW_LNS_set_column:
 327       {
 328         const uint64_t colno = read_unsigned_LEB_128(start, &templen);
 329         oplen += templen;
 330         lsm->column_num = colno;
 331       }
 332       break;
 333
 334     case elfcpp::DW_LNS_negate_stmt:
 335       lsm->is_stmt = !lsm->is_stmt;
 336       break;
 337
 338     case elfcpp::DW_LNS_set_basic_block:
 339       lsm->basic_block = true;
 340       break;
 341
 342     case elfcpp::DW_LNS_fixed_advance_pc:
 343       {
 344         int advance_address;
 345         advance_address = elfcpp::Swap_unaligned<16, big_endian>::readval(start);
 346         oplen += 2;
 347         lsm->address += advance_address;
 348       }
 349       break;
 350
 351     case elfcpp::DW_LNS_const_add_pc:
 352       {
 353         const int advance_address = (header_.min_insn_length
 354                                      * ((255 - header_.opcode_base)
 355                                         / header_.line_range));
 356         lsm->address += advance_address;
 357       }
 358       break;
 359
 360     case elfcpp::DW_LNS_extended_op:
 361       {
 362         const uint64_t extended_op_len
 363             = read_unsigned_LEB_128(start, &templen);
 364         start += templen;
 365         oplen += templen + extended_op_len;
 366
 367         const unsigned char extended_op = *start;
 368         start++;
 369
 370         switch (extended_op)
 371           {
 372           case elfcpp::DW_LNE_end_sequence:
 373             // This means that the current byte is the one immediately
 374             // after a set of instructions.  Record the current line
 375             // for up to one less than the current address.
 376             lsm->line_num = -1;
 377             lsm->end_sequence = true;
 378             *len = oplen;
 379             return true;
 380
 381           case elfcpp::DW_LNE_set_address:
 382             {
 383               lsm->address =
 384                 elfcpp::Swap_unaligned<size, big_endian>::readval(start);
 385               typename Reloc_map::const_iterator it
 386                   = this->reloc_map_.find(start - this->buffer_);
 387               if (it != reloc_map_.end())
 388                 {
 389                   // If this is a SHT_RELA section, then ignore the
 390                   // section contents.  This assumes that this is a
 391                   // straight reloc which just uses the reloc addend.
 392                   // The reloc addend has already been included in the
 393                   // symbol value.
 394                   if (this->track_relocs_type_ == elfcpp::SHT_RELA)
 395                     lsm->address = 0;
 396                   // Add in the symbol value.
 397                   lsm->address += it->second.second;
 398                   lsm->shndx = it->second.first;
 399                 }
 400               else
 401                 {
 402                   // If we're a normal .o file, with relocs, every
 403                   // set_address should have an associated relocation.
 404                   if (this->input_is_relobj())
 405                     this->data_valid_ = false;
 406                 }
 407               break;
 408             }
 409           case elfcpp::DW_LNE_define_file:
 410             {
 411               const char* filename  = reinterpret_cast<const char*>(start);
 412               templen = strlen(filename) + 1;
 413               start += templen;
 414
 415               uint64_t dirindex = read_unsigned_LEB_128(start, &templen);
 416
 417               if (dirindex >= this->directories_.back().size())
 418                 dirindex = 0;
 419               int dirindexi = static_cast<int>(dirindex);
 420
 421               // This opcode takes two additional ULEB128 parameters
 422               // (mod_time and filelength), but we don't use those
 423               // values.  Because OPLEN already tells us how far to
 424               // skip to the next opcode, we don't need to read
 425               // them at all.
 426
 427               this->files_.back().push_back(std::make_pair(dirindexi,
 428                                                            filename));
 429             }
 430             break;
 431           }
 432       }
 433       break;
 434
 435     default:
 436       {
 437         // Ignore unknown opcode  silently
 438         for (int i = 0; i < header_.std_opcode_lengths[opcode]; i++)
 439           {
 440             size_t templen;
 441             read_unsigned_LEB_128(start, &templen);
 442             start += templen;
 443             oplen += templen;
 444           }
 445       }
 446       break;
 447   }
 448   *len = oplen;
 449   return false;
 450 }
 451
 452 // Read the debug information at LINEPTR and store it in the line
 453 // number map.
 454
 455 template<int size, bool big_endian>
 456 unsigned const char*
 457 Sized_dwarf_line_info<size, big_endian>::read_lines(unsigned const char* lineptr,
 458                                                     unsigned int shndx)
 459 {
 460   struct LineStateMachine lsm;
 461
 462   // LENGTHSTART is the place the length field is based on.  It is the
 463   // point in the header after the initial length field.
 464   const unsigned char* lengthstart = buffer_;
 465
 466   // In 64 bit dwarf, the initial length is 12 bytes, because of the
 467   // 0xffffffff at the start.
 468   if (header_.offset_size == 8)
 469     lengthstart += 12;
 470   else
 471     lengthstart += 4;
 472
 473   while (lineptr < lengthstart + header_.total_length)
 474     {
 475       ResetLineStateMachine(&lsm, header_.default_is_stmt);
 476       while (!lsm.end_sequence)
 477         {
 478           size_t oplength;
 479           bool add_line = this->process_one_opcode(lineptr, &lsm, &oplength);
 480           if (add_line
 481               && (shndx == -1U || lsm.shndx == -1U || shndx == lsm.shndx))
 482             {
 483               Offset_to_lineno_entry entry
 484                   = { static_cast<off_t>(lsm.address),
 485                       this->current_header_index_,
 486                       static_cast<unsigned int>(lsm.file_num),
 487                       true, lsm.line_num };
 488               std::vector<Offset_to_lineno_entry>&
 489                 map(this->line_number_map_[lsm.shndx]);
 490               // If we see two consecutive entries with the same
 491               // offset and a real line number, then mark the first
 492               // one as non-canonical.
 493               if (!map.empty()
 494                   && (map.back().offset == static_cast<off_t>(lsm.address))
 495                   && lsm.line_num != -1
 496                   && map.back().line_num != -1)
 497                 map.back().last_line_for_offset = false;
 498               map.push_back(entry);
 499             }
 500           lineptr += oplength;
 501         }
 502     }
 503
 504   return lengthstart + header_.total_length;
 505 }
 506
 507 // Looks in the symtab to see what section a symbol is in.
 508
 509 template<int size, bool big_endian>
 510 unsigned int
 511 Sized_dwarf_line_info<size, big_endian>::symbol_section(
 512     Object* object,
 513     unsigned int sym,
 514     typename elfcpp::Elf_types<size>::Elf_Addr* value,
 515     bool* is_ordinary)
 516 {
 517   const int symsize = elfcpp::Elf_sizes<size>::sym_size;
 518   gold_assert(sym * symsize < this->symtab_buffer_size_);
 519   elfcpp::Sym<size, big_endian> elfsym(this->symtab_buffer_ + sym * symsize);
 520   *value = elfsym.get_st_value();
 521   return object->adjust_sym_shndx(sym, elfsym.get_st_shndx(), is_ordinary);
 522 }
 523
 524 // Read the relocations into a Reloc_map.
 525
 526 template<int size, bool big_endian>
 527 void
 528 Sized_dwarf_line_info<size, big_endian>::read_relocs(Object* object)
 529 {
 530   if (this->symtab_buffer_ == NULL)
 531     return;
 532
 533   typename elfcpp::Elf_types<size>::Elf_Addr value;
 534   off_t reloc_offset;
 535   while ((reloc_offset = this->track_relocs_.next_offset()) != -1)
 536     {
 537       const unsigned int sym = this->track_relocs_.next_symndx();
 538
 539       bool is_ordinary;
 540       const unsigned int shndx = this->symbol_section(object, sym, &value,
 541                                                       &is_ordinary);
 542
 543       // There is no reason to record non-ordinary section indexes, or
 544       // SHN_UNDEF, because they will never match the real section.
 545       if (is_ordinary && shndx != elfcpp::SHN_UNDEF)
 546         {
 547           value += this->track_relocs_.next_addend();
 548           this->reloc_map_[reloc_offset] = std::make_pair(shndx, value);
 549         }
 550
 551       this->track_relocs_.advance(reloc_offset + 1);
 552     }
 553 }
 554
 555 // Read the line number info.
 556
 557 template<int size, bool big_endian>
 558 void
 559 Sized_dwarf_line_info<size, big_endian>::read_line_mappings(Object* object,
 560                                                             unsigned int shndx)
 561 {
 562   gold_assert(this->data_valid_ == true);
 563
 564   this->read_relocs(object);
 565   while (this->buffer_ < this->buffer_end_)
 566     {
 567       const unsigned char* lineptr = this->buffer_;
 568       lineptr = this->read_header_prolog(lineptr);
 569       lineptr = this->read_header_tables(lineptr);
 570       lineptr = this->read_lines(lineptr, shndx);
 571       this->buffer_ = lineptr;
 572     }
 573
 574   // Sort the lines numbers, so addr2line can use binary search.
 575   for (typename Lineno_map::iterator it = line_number_map_.begin();
 576        it != line_number_map_.end();
 577        ++it)
 578     // Each vector needs to be sorted by offset.
 579     std::sort(it->second.begin(), it->second.end());
 580 }
 581
 582 // Some processing depends on whether the input is a .o file or not.
 583 // For instance, .o files have relocs, and have .debug_lines
 584 // information on a per section basis.  .so files, on the other hand,
 585 // lack relocs, and offsets are unique, so we can ignore the section
 586 // information.
 587
 588 template<int size, bool big_endian>
 589 bool
 590 Sized_dwarf_line_info<size, big_endian>::input_is_relobj()
 591 {
 592   // Only .o files have relocs and the symtab buffer that goes with them.
 593   return this->symtab_buffer_ != NULL;
 594 }
 595
 596 // Given an Offset_to_lineno_entry vector, and an offset, figure out
 597 // if the offset points into a function according to the vector (see
 598 // comments below for the algorithm).  If it does, return an iterator
 599 // into the vector that points to the line-number that contains that
 600 // offset.  If not, it returns vector::end().
 601
 602 static std::vector<Offset_to_lineno_entry>::const_iterator
 603 offset_to_iterator(const std::vector<Offset_to_lineno_entry>* offsets,
 604                    off_t offset)
 605 {
 606   const Offset_to_lineno_entry lookup_key = { offset, 0, 0, true, 0 };
 607
 608   // lower_bound() returns the smallest offset which is >= lookup_key.
 609   // If no offset in offsets is >= lookup_key, returns end().
 610   std::vector<Offset_to_lineno_entry>::const_iterator it
 611       = std::lower_bound(offsets->begin(), offsets->end(), lookup_key);
 612
 613   // This code is easiest to understand with a concrete example.
 614   // Here's a possible offsets array:
 615   // {{offset = 3211, header_num = 0, file_num = 1, last, line_num = 16},  // 0
 616   //  {offset = 3224, header_num = 0, file_num = 1, last, line_num = 20},  // 1
 617   //  {offset = 3226, header_num = 0, file_num = 1, last, line_num = 22},  // 2
 618   //  {offset = 3231, header_num = 0, file_num = 1, last, line_num = 25},  // 3
 619   //  {offset = 3232, header_num = 0, file_num = 1, last, line_num = -1},  // 4
 620   //  {offset = 3232, header_num = 0, file_num = 1, last, line_num = 65},  // 5
 621   //  {offset = 3235, header_num = 0, file_num = 1, last, line_num = 66},  // 6
 622   //  {offset = 3236, header_num = 0, file_num = 1, last, line_num = -1},  // 7
 623   //  {offset = 5764, header_num = 0, file_num = 1, last, line_num = 48},  // 8
 624   //  {offset = 5764, header_num = 0, file_num = 1,!last, line_num = 47},  // 9
 625   //  {offset = 5765, header_num = 0, file_num = 1, last, line_num = 49},  // 10
 626   //  {offset = 5767, header_num = 0, file_num = 1, last, line_num = 50},  // 11
 627   //  {offset = 5768, header_num = 0, file_num = 1, last, line_num = 51},  // 12
 628   //  {offset = 5773, header_num = 0, file_num = 1, last, line_num = -1},  // 13
 629   //  {offset = 5787, header_num = 1, file_num = 1, last, line_num = 19},  // 14
 630   //  {offset = 5790, header_num = 1, file_num = 1, last, line_num = 20},  // 15
 631   //  {offset = 5793, header_num = 1, file_num = 1, last, line_num = 67},  // 16
 632   //  {offset = 5793, header_num = 1, file_num = 1, last, line_num = -1},  // 17
 633   //  {offset = 5793, header_num = 1, file_num = 1,!last, line_num = 66},  // 18
 634   //  {offset = 5795, header_num = 1, file_num = 1, last, line_num = 68},  // 19
 635   //  {offset = 5798, header_num = 1, file_num = 1, last, line_num = -1},  // 20
 636   // The entries with line_num == -1 mark the end of a function: the
 637   // associated offset is one past the last instruction in the
 638   // function.  This can correspond to the beginning of the next
 639   // function (as is true for offset 3232); alternately, there can be
 640   // a gap between the end of one function and the start of the next
 641   // (as is true for some others, most obviously from 3236->5764).
 642   //
 643   // Case 1: lookup_key has offset == 10.  lower_bound returns
 644   //         offsets[0].  Since it's not an exact match and we're
 645   //         at the beginning of offsets, we return end() (invalid).
 646   // Case 2: lookup_key has offset 10000.  lower_bound returns
 647   //         offset[21] (end()).  We return end() (invalid).
 648   // Case 3: lookup_key has offset == 3211.  lower_bound matches
 649   //         offsets[0] exactly, and that's the entry we return.
 650   // Case 4: lookup_key has offset == 3232.  lower_bound returns
 651   //         offsets[4].  That's an exact match, but indicates
 652   //         end-of-function.  We check if offsets[5] is also an
 653   //         exact match but not end-of-function.  It is, so we
 654   //         return offsets[5].
 655   // Case 5: lookup_key has offset == 3214.  lower_bound returns
 656   //         offsets[1].  Since it's not an exact match, we back
 657   //         up to the offset that's < lookup_key, offsets[0].
 658   //         We note offsets[0] is a valid entry (not end-of-function),
 659   //         so that's the entry we return.
 660   // Case 6: lookup_key has offset == 4000.  lower_bound returns
 661   //         offsets[8].  Since it's not an exact match, we back
 662   //         up to offsets[7].  Since offsets[7] indicates
 663   //         end-of-function, we know lookup_key is between
 664   //         functions, so we return end() (not a valid offset).
 665   // Case 7: lookup_key has offset == 5794.  lower_bound returns
 666   //         offsets[19].  Since it's not an exact match, we back
 667   //         up to offsets[16].  Note we back up to the *first*
 668   //         entry with offset 5793, not just offsets[19-1].
 669   //         We note offsets[16] is a valid entry, so we return it.
 670   //         If offsets[16] had had line_num == -1, we would have
 671   //         checked offsets[17].  The reason for this is that
 672   //         16 and 17 can be in an arbitrary order, since we sort
 673   //         only by offset and last_line_for_offset.  (Note it
 674   //         doesn't help to use line_number as a tertiary sort key,
 675   //         since sometimes we want the -1 to be first and sometimes
 676   //         we want it to be last.)
 677
 678   // This deals with cases (1) and (2).
 679   if ((it == offsets->begin() && offset < it->offset)
 680       || it == offsets->end())
 681     return offsets->end();
 682
 683   // This deals with cases (3) and (4).
 684   if (offset == it->offset)
 685     {
 686       while (it != offsets->end()
 687              && it->offset == offset
 688              && it->line_num == -1)
 689         ++it;
 690       if (it == offsets->end() || it->offset != offset)
 691         return offsets->end();
 692       else
 693         return it;
 694     }
 695
 696   // This handles the first part of case (7) -- we back up to the
 697   // *first* entry that has the offset that's behind us.
 698   gold_assert(it != offsets->begin());
 699   std::vector<Offset_to_lineno_entry>::const_iterator range_end = it;
 700   --it;
 701   const off_t range_value = it->offset;
 702   while (it != offsets->begin() && (it-1)->offset == range_value)
 703     --it;
 704
 705   // This handles cases (5), (6), and (7): if any entry in the
 706   // equal_range [it, range_end) has a line_num != -1, it's a valid
 707   // match.  If not, we're not in a function.  The line number we saw
 708   // last for an offset will be sorted first, so it'll get returned if
 709   // it's present.
 710   for (; it != range_end; ++it)
 711     if (it->line_num != -1)
 712       return it;
 713   return offsets->end();
 714 }
 715
 716 // Returns the canonical filename:lineno for the address passed in.
 717 // If other_lines is not NULL, appends the non-canonical lines
 718 // assigned to the same address.
 719
 720 template<int size, bool big_endian>
 721 std::string
 722 Sized_dwarf_line_info<size, big_endian>::do_addr2line(
 723     unsigned int shndx,
 724     off_t offset,
 725     std::vector<std::string>* other_lines)
 726 {
 727   if (this->data_valid_ == false)
 728     return "";
 729
 730   const std::vector<Offset_to_lineno_entry>* offsets;
 731   // If we do not have reloc information, then our input is a .so or
 732   // some similar data structure where all the information is held in
 733   // the offset.  In that case, we ignore the input shndx.
 734   if (this->input_is_relobj())
 735     offsets = &this->line_number_map_[shndx];
 736   else
 737     offsets = &this->line_number_map_[-1U];
 738   if (offsets->empty())
 739     return "";
 740
 741   typename std::vector<Offset_to_lineno_entry>::const_iterator it
 742       = offset_to_iterator(offsets, offset);
 743   if (it == offsets->end())
 744     return "";
 745
 746   std::string result = this->format_file_lineno(*it);
 747   if (other_lines != NULL)
 748     for (++it; it != offsets->end() && it->offset == offset; ++it)
 749       {
 750         if (it->line_num == -1)
 751           continue;  // The end of a previous function.
 752         other_lines->push_back(this->format_file_lineno(*it));
 753       }
 754   return result;
 755 }
 756
 757 // Convert the file_num + line_num into a string.
 758
 759 template<int size, bool big_endian>
 760 std::string
 761 Sized_dwarf_line_info<size, big_endian>::format_file_lineno(
 762     const Offset_to_lineno_entry& loc) const
 763 {
 764   std::string ret;
 765
 766   gold_assert(loc.header_num < static_cast<int>(this->files_.size()));
 767   gold_assert(loc.file_num
 768               < static_cast<int>(this->files_[loc.header_num].size()));
 769   const std::pair<int, std::string>& filename_pair
 770       = this->files_[loc.header_num][loc.file_num];
 771   const std::string& filename = filename_pair.second;
 772
 773   gold_assert(loc.header_num < static_cast<int>(this->directories_.size()));
 774   gold_assert(filename_pair.first
 775               < static_cast<int>(this->directories_[loc.header_num].size()));
 776   const std::string& dirname
 777       = this->directories_[loc.header_num][filename_pair.first];
 778
 779   if (!dirname.empty())
 780     {
 781       ret += dirname;
 782       ret += "/";
 783     }
 784   ret += filename;
 785   if (ret.empty())
 786     ret = "(unknown)";
 787
 788   char buffer[64];   // enough to hold a line number
 789   snprintf(buffer, sizeof(buffer), "%d", loc.line_num);
 790   ret += ":";
 791   ret += buffer;
 792
 793   return ret;
 794 }
 795
 796 // Dwarf_line_info routines.
 797
 798 static unsigned int next_generation_count = 0;
 799
 800 struct Addr2line_cache_entry
 801 {
 802   Object* object;
 803   unsigned int shndx;
 804   Dwarf_line_info* dwarf_line_info;
 805   unsigned int generation_count;
 806   unsigned int access_count;
 807
 808   Addr2line_cache_entry(Object* o, unsigned int s, Dwarf_line_info* d)
 809       : object(o), shndx(s), dwarf_line_info(d),
 810         generation_count(next_generation_count), access_count(0)
 811   {
 812     if (next_generation_count < (1U << 31))
 813       ++next_generation_count;
 814   }
 815 };
 816 // We expect this cache to be small, so don't bother with a hashtable
 817 // or priority queue or anything: just use a simple vector.
 818 static std::vector<Addr2line_cache_entry> addr2line_cache;
 819
 820 std::string
 821 Dwarf_line_info::one_addr2line(Object* object,
 822                                unsigned int shndx, off_t offset,
 823                                size_t cache_size,
 824                                std::vector<std::string>* other_lines)
 825 {
 826   Dwarf_line_info* lineinfo = NULL;
 827   std::vector<Addr2line_cache_entry>::iterator it;
 828
 829   // First, check the cache.  If we hit, update the counts.
 830   for (it = addr2line_cache.begin(); it != addr2line_cache.end(); ++it)
 831     {
 832       if (it->object == object && it->shndx == shndx)
 833         {
 834           lineinfo = it->dwarf_line_info;
 835           it->generation_count = next_generation_count;
 836           // We cap generation_count at 2^31 -1 to avoid overflow.
 837           if (next_generation_count < (1U << 31))
 838             ++next_generation_count;
 839           // We cap access_count at 31 so 2^access_count doesn't overflow
 840           if (it->access_count < 31)
 841             ++it->access_count;
 842           break;
 843         }
 844     }
 845
 846   // If we don't hit the cache, create a new object and insert into the
 847   // cache.
 848   if (lineinfo == NULL)
 849   {
 850     switch (parameters->size_and_endianness())
 851       {
 852 #ifdef HAVE_TARGET_32_LITTLE
 853         case Parameters::TARGET_32_LITTLE:
 854           lineinfo = new Sized_dwarf_line_info<32, false>(object, shndx); break;
 855 #endif
 856 #ifdef HAVE_TARGET_32_BIG
 857         case Parameters::TARGET_32_BIG:
 858           lineinfo = new Sized_dwarf_line_info<32, true>(object, shndx); break;
 859 #endif
 860 #ifdef HAVE_TARGET_64_LITTLE
 861         case Parameters::TARGET_64_LITTLE:
 862           lineinfo = new Sized_dwarf_line_info<64, false>(object, shndx); break;
 863 #endif
 864 #ifdef HAVE_TARGET_64_BIG
 865         case Parameters::TARGET_64_BIG:
 866           lineinfo = new Sized_dwarf_line_info<64, true>(object, shndx); break;
 867 #endif
 868         default:
 869           gold_unreachable();
 870       }
 871     addr2line_cache.push_back(Addr2line_cache_entry(object, shndx, lineinfo));
 872   }
 873
 874   // Now that we have our object, figure out the answer
 875   std::string retval = lineinfo->addr2line(shndx, offset, other_lines);
 876
 877   // Finally, if our cache has grown too big, delete old objects.  We
 878   // assume the common (probably only) case is deleting only one object.
 879   // We use a pretty simple scheme to evict: function of LRU and MFU.
 880   while (addr2line_cache.size() > cache_size)
 881     {
 882       unsigned int lowest_score = ~0U;
 883       std::vector<Addr2line_cache_entry>::iterator lowest
 884           = addr2line_cache.end();
 885       for (it = addr2line_cache.begin(); it != addr2line_cache.end(); ++it)
 886         {
 887           const unsigned int score = (it->generation_count
 888                                       + (1U << it->access_count));
 889           if (score < lowest_score)
 890             {
 891               lowest_score = score;
 892               lowest = it;
 893             }
 894         }
 895       if (lowest != addr2line_cache.end())
 896         {
 897           delete lowest->dwarf_line_info;
 898           addr2line_cache.erase(lowest);
 899         }
 900     }
 901
 902   return retval;
 903 }
 904
 905 void
 906 Dwarf_line_info::clear_addr2line_cache()
 907 {
 908   for (std::vector<Addr2line_cache_entry>::iterator it = addr2line_cache.begin();
 909        it != addr2line_cache.end();
 910        ++it)
 911     delete it->dwarf_line_info;
 912   addr2line_cache.clear();
 913 }
 914
 915 #ifdef HAVE_TARGET_32_LITTLE
 916 template
 917 class Sized_dwarf_line_info<32, false>;
 918 #endif
 919
 920 #ifdef HAVE_TARGET_32_BIG
 921 template
 922 class Sized_dwarf_line_info<32, true>;
 923 #endif
 924
 925 #ifdef HAVE_TARGET_64_LITTLE
 926 template
 927 class Sized_dwarf_line_info<64, false>;
 928 #endif
 929
 930 #ifdef HAVE_TARGET_64_BIG
 931 template
 932 class Sized_dwarf_line_info<64, true>;
 933 #endif
 934
 935 } // End namespace gold.