using namespace lld;
using namespace lld::macho;
+#define COMMON_ENCODINGS_MAX 127
+#define COMPACT_ENCODINGS_MAX 256
+
+#define SECOND_LEVEL_PAGE_BYTES 4096
+#define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t))
+#define REGULAR_SECOND_LEVEL_ENTRIES_MAX \
+ ((SECOND_LEVEL_PAGE_BYTES - \
+ sizeof(unwind_info_regular_second_level_page_header)) / \
+ sizeof(unwind_info_regular_second_level_entry))
+#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
+ ((SECOND_LEVEL_PAGE_BYTES - \
+ sizeof(unwind_info_compressed_second_level_page_header)) / \
+ sizeof(uint32_t))
+
+#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
+#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
+ UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
+
// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
// optimizes space and exception-time lookup. Most DWARF unwind
// entries can be replaced with Compact Unwind entries, but the ones
// Rather than sort & fold the 32-byte entries directly, we create a
// vector of pointers to entries and sort & fold that instead.
cuPtrVector.reserve(cuCount);
- for (const auto &cuEntry : cuVector)
+ for (const CompactUnwindEntry64 &cuEntry : cuVector)
cuPtrVector.emplace_back(&cuEntry);
std::sort(cuPtrVector.begin(), cuPtrVector.end(),
[](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
cuPtrVector.erase(foldWrite, cuPtrVector.end());
// Count frequencies of the folded encodings
- llvm::DenseMap<compact_unwind_encoding_t, size_t> encodingFrequencies;
+ EncodingMap encodingFrequencies;
for (auto cuPtrEntry : cuPtrVector)
encodingFrequencies[cuPtrEntry->encoding]++;
- if (encodingFrequencies.size() > UNWIND_INFO_COMMON_ENCODINGS_MAX)
- error("TODO(gkm): handle common encodings table overflow");
- // Make a table of encodings, sorted by descending frequency
+ // Make a vector of encodings, sorted by descending frequency
for (const auto &frequency : encodingFrequencies)
commonEncodings.emplace_back(frequency);
std::sort(commonEncodings.begin(), commonEncodings.end(),
return a.second > b.second;
});
- // Split folded encodings into pages, limited by capacity of a page
- // and the 24-bit range of function offset
- //
- // Record the page splits as a vector of iterators on cuPtrVector
- // such that successive elements form a semi-open interval. E.g.,
- // page X's bounds are thus: [ pageBounds[X] .. pageBounds[X+1] )
- //
- // Note that pageBounds.size() is one greater than the number of
- // pages, and pageBounds.back() holds the sentinel cuPtrVector.cend()
- pageBounds.push_back(cuPtrVector.cbegin());
- // TODO(gkm): cut 1st page entries short to accommodate section headers ???
- CompactUnwindEntry64 cuEntryKey;
- for (size_t i = 0;;) {
- // Limit the search to entries that can fit within a 4 KiB page.
- const auto pageBegin = pageBounds[0] + i;
- const auto pageMax =
- pageBounds[0] +
- std::min(i + UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX,
- cuPtrVector.size());
- // Exclude entries with functionOffset that would overflow 24 bits
- cuEntryKey.functionAddress = (*pageBegin)->functionAddress +
- UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
- const auto pageBreak = std::lower_bound(
- pageBegin, pageMax, &cuEntryKey,
- [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
- return a->functionAddress < b->functionAddress;
- });
- pageBounds.push_back(pageBreak);
- if (pageBreak == cuPtrVector.cend())
- break;
- i = pageBreak - cuPtrVector.cbegin();
+ // Truncate the vector to 127 elements.
+ // Common encoding indexes are limited to 0..126, while enconding
+ // indexes 127..255 are local to each second-level page
+ if (commonEncodings.size() > COMMON_ENCODINGS_MAX)
+ commonEncodings.resize(COMMON_ENCODINGS_MAX);
+
+ // Create a map from encoding to common-encoding-table index
+ for (size_t i = 0; i < commonEncodings.size(); i++)
+ commonEncodingIndexes[commonEncodings[i].first] = i;
+
+ // Split folded encodings into pages, where each page is limited by ...
+ // (a) 4 KiB capacity
+ // (b) 24-bit difference between first & final function address
+ // (c) 8-bit compact-encoding-table index,
+ // for which 0..126 references the global common-encodings table,
+ // and 127..255 references a local per-second-level-page table.
+ // First we try the compact format and determine how many entries fit.
+ // If more entries fit in the regular format, we use that.
+ for (size_t i = 0; i < cuPtrVector.size();) {
+ secondLevelPages.emplace_back();
+ auto &page = secondLevelPages.back();
+ page.entryIndex = i;
+ uintptr_t functionAddressMax =
+ cuPtrVector[i]->functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
+ size_t n = commonEncodings.size();
+ size_t wordsRemaining =
+ SECOND_LEVEL_PAGE_WORDS -
+ sizeof(unwind_info_compressed_second_level_page_header) /
+ sizeof(uint32_t);
+ while (wordsRemaining >= 1 && i < cuPtrVector.size()) {
+ const auto *cuPtr = cuPtrVector[i];
+ if (cuPtr->functionAddress >= functionAddressMax) {
+ break;
+ } else if (commonEncodingIndexes.count(cuPtr->encoding) ||
+ page.localEncodingIndexes.count(cuPtr->encoding)) {
+ i++;
+ wordsRemaining--;
+ } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) {
+ page.localEncodings.emplace_back(cuPtr->encoding);
+ page.localEncodingIndexes[cuPtr->encoding] = n++;
+ i++;
+ wordsRemaining -= 2;
+ } else {
+ break;
+ }
+ }
+ page.entryCount = i - page.entryIndex;
+
+ // If this is not the final page, see if it's possible to fit more
+ // entries by using the regular format. This can happen when there
+ // are many unique encodings, and we we saturated the local
+ // encoding table early.
+ if (i < cuPtrVector.size() &&
+ page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) {
+ page.kind = UNWIND_SECOND_LEVEL_REGULAR;
+ page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX,
+ cuPtrVector.size() - page.entryIndex);
+ i = page.entryIndex + page.entryCount;
+ } else {
+ page.kind = UNWIND_SECOND_LEVEL_COMPRESSED;
+ }
}
// compute size of __TEXT,__unwind_info section
sizeof(unwind_info_section_header) +
commonEncodings.size() * sizeof(uint32_t) +
personalities.size() * sizeof(uint32_t) +
- pageBounds.size() * sizeof(unwind_info_section_header_index_entry) +
+ // The extra second-level-page entry is for the sentinel
+ (secondLevelPages.size() + 1) *
+ sizeof(unwind_info_section_header_index_entry) +
lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry);
- unwindInfoSize = level2PagesOffset +
- (pageBounds.size() - 1) *
- sizeof(unwind_info_compressed_second_level_page_header) +
- cuPtrVector.size() * sizeof(uint32_t);
+ unwindInfoSize =
+ level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES;
}
// All inputs are relocated and output addresses are known, so write!
uip->personalityArrayCount = personalities.size();
uip->indexSectionOffset = uip->personalityArraySectionOffset +
(uip->personalityArrayCount * sizeof(uint32_t));
- uip->indexCount = pageBounds.size();
+ uip->indexCount = secondLevelPages.size() + 1;
// Common encodings
auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]);
*i32p++ = encoding.first;
// Personalities
- for (const auto &personality : personalities)
+ for (const uint32_t &personality : personalities)
*i32p++ = personality;
// Level-1 index
uip->indexCount * sizeof(unwind_info_section_header_index_entry);
uint64_t l2PagesOffset = level2PagesOffset;
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
- for (size_t i = 0; i < pageBounds.size() - 1; i++) {
- iep->functionOffset = (*pageBounds[i])->functionAddress;
+ for (const SecondLevelPage &page : secondLevelPages) {
+ iep->functionOffset = cuPtrVector[page.entryIndex]->functionAddress;
iep->secondLevelPagesSectionOffset = l2PagesOffset;
iep->lsdaIndexArraySectionOffset = lsdaOffset;
iep++;
- // TODO(gkm): pad to 4 KiB page boundary ???
- size_t entryCount = pageBounds[i + 1] - pageBounds[i];
- uint64_t pageSize = sizeof(unwind_info_section_header_index_entry) +
- entryCount * sizeof(uint32_t);
- l2PagesOffset += pageSize;
+ l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
}
// Level-1 sentinel
const CompactUnwindEntry64 &cuEnd = cuVector.back();
// LSDAs
auto *lep =
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
- for (const auto &lsda : lsdaEntries) {
+ for (const unwind_info_section_header_lsda_index_entry &lsda : lsdaEntries) {
lep->functionOffset = lsda.functionOffset;
lep->lsdaOffset = lsda.lsdaOffset;
}
- // create map from encoding to common-encoding-table index compact
- // encoding entries use 7 bits to index the common-encoding table
- size_t i = 0;
- llvm::DenseMap<compact_unwind_encoding_t, size_t> commonEncodingIndexes;
- for (const auto &encoding : commonEncodings)
- commonEncodingIndexes[encoding.first] = i++;
-
// Level-2 pages
- auto *p2p =
- reinterpret_cast<unwind_info_compressed_second_level_page_header *>(lep);
- for (size_t i = 0; i < pageBounds.size() - 1; i++) {
- p2p->kind = UNWIND_SECOND_LEVEL_COMPRESSED;
- p2p->entryPageOffset =
- sizeof(unwind_info_compressed_second_level_page_header);
- p2p->entryCount = pageBounds[i + 1] - pageBounds[i];
- p2p->encodingsPageOffset =
- p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
- p2p->encodingsCount = 0;
- auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
- auto cuPtrVectorIt = pageBounds[i];
- uintptr_t functionAddressBase = (*cuPtrVectorIt)->functionAddress;
- while (cuPtrVectorIt < pageBounds[i + 1]) {
- const CompactUnwindEntry64 *cuep = *cuPtrVectorIt++;
- size_t cueIndex = commonEncodingIndexes.lookup(cuep->encoding);
- *ep++ = ((cueIndex << UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
- (cuep->functionAddress - functionAddressBase));
+ auto *pp = reinterpret_cast<uint32_t *>(lep);
+ for (const SecondLevelPage &page : secondLevelPages) {
+ if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) {
+ uintptr_t functionAddressBase =
+ cuPtrVector[page.entryIndex]->functionAddress;
+ auto *p2p =
+ reinterpret_cast<unwind_info_compressed_second_level_page_header *>(
+ pp);
+ p2p->kind = page.kind;
+ p2p->entryPageOffset =
+ sizeof(unwind_info_compressed_second_level_page_header);
+ p2p->entryCount = page.entryCount;
+ p2p->encodingsPageOffset =
+ p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
+ p2p->encodingsCount = page.localEncodings.size();
+ auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
+ for (size_t i = 0; i < page.entryCount; i++) {
+ const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i];
+ auto it = commonEncodingIndexes.find(cuep->encoding);
+ if (it == commonEncodingIndexes.end())
+ it = page.localEncodingIndexes.find(cuep->encoding);
+ *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
+ (cuep->functionAddress - functionAddressBase);
+ }
+ memcpy(ep, page.localEncodings.data(),
+ page.localEncodings.size() * sizeof(uint32_t));
+ } else {
+ auto *p2p =
+ reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp);
+ p2p->kind = page.kind;
+ p2p->entryPageOffset =
+ sizeof(unwind_info_regular_second_level_page_header);
+ p2p->entryCount = page.entryCount;
+ auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
+ for (size_t i = 0; i < page.entryCount; i++) {
+ const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i];
+ *ep++ = cuep->functionAddress;
+ *ep++ = cuep->encoding;
+ }
}
- p2p =
- reinterpret_cast<unwind_info_compressed_second_level_page_header *>(ep);
+ pp += SECOND_LEVEL_PAGE_WORDS;
}
- assert(getSize() ==
- static_cast<size_t>((reinterpret_cast<uint8_t *>(p2p) - buf)));
}