From d4381153ea63e9458ffb9dd20ea92fb35d4e3042 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 11 Jul 2022 16:56:04 +0200 Subject: [PATCH] [lldb/libc++] Simplify the libc++ string formatter Precise string layout has changed a lot recently, but a long of these changes did not have any effect on the usages of its fields -- e.g. introduction/removal of an anonymous struct or union does not change the way one can access the field in C++. Our name-based variable lookup rules (deliberately) copy the C++ semantics, which means these changes would have been invisible to the them, if only we were using name-based lookup. This patch replaces the opaque child index accesses with name-based lookups, which allows us to greatly simplify the data formatter code. The formatter continues to support all the string layouts that it previously supported. It is unclear why the formatter was not using this approach from the beginning. I would speculate that the original version was working around some (now fixed) issue with anonymous members or base classes, and the subsequent revisions stuck with that approach out of inertia. Differential Revision: https://reviews.llvm.org/D129490 --- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp | 112 ++++++++-------------- 1 file changed, 38 insertions(+), 74 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index eaaa164..6c651c6 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -547,101 +547,77 @@ bool lldb_private::formatters::LibcxxContainerSummaryProvider( } /// The field layout in a libc++ string (cap, side, data or data, size, cap). -enum LibcxxStringLayoutMode { - eLibcxxStringLayoutModeCSD = 0, - eLibcxxStringLayoutModeDSC = 1, - eLibcxxStringLayoutModeInvalid = 0xffff -}; +namespace { +enum class StringLayout { CSD, DSC }; +} /// Determine the size in bytes of \p valobj (a libc++ std::string object) and /// extract its data payload. Return the size + payload pair. // TODO: Support big-endian architectures. static llvm::Optional> ExtractLibcxxStringInfo(ValueObject &valobj) { - ValueObjectSP dataval_sp(valobj.GetChildAtIndexPath({0, 0, 0, 0})); - if (!dataval_sp) + ValueObjectSP valobj_r_sp = + valobj.GetChildMemberWithName(ConstString("__r_"), /*can_create=*/true); + if (!valobj_r_sp) return {}; - if (!dataval_sp->GetError().Success()) + + // __r_ is a compressed_pair of the actual data and the allocator. The data we + // want is in the first base class. + ValueObjectSP valobj_r_base_sp = + valobj_r_sp->GetChildAtIndex(0, /*can_create=*/true); + if (!valobj_r_base_sp) return {}; - ValueObjectSP layout_decider( - dataval_sp->GetChildAtIndexPath(llvm::ArrayRef({0, 0}))); + ValueObjectSP valobj_rep_sp = valobj_r_base_sp->GetChildMemberWithName( + ConstString("__value_"), /*can_create=*/true); + if (!valobj_rep_sp) + return {}; - // this child should exist - if (!layout_decider) + ValueObjectSP l = valobj_rep_sp->GetChildMemberWithName(ConstString("__l"), + /*can_create=*/true); + if (!l) return {}; - ConstString g_data_name("__data_"); - ConstString g_size_name("__size_"); + StringLayout layout = l->GetIndexOfChildWithName(ConstString("__data_")) == 0 + ? StringLayout::DSC + : StringLayout::CSD; + bool short_mode = false; // this means the string is in short-mode and the // data is stored inline bool using_bitmasks = true; // Whether the class uses bitmasks for the mode // flag (pre-D123580). uint64_t size; - LibcxxStringLayoutMode layout = (layout_decider->GetName() == g_data_name) - ? eLibcxxStringLayoutModeDSC - : eLibcxxStringLayoutModeCSD; uint64_t size_mode_value = 0; - ValueObjectSP short_sp(dataval_sp->GetChildAtIndex(1, true)); + ValueObjectSP short_sp = valobj_rep_sp->GetChildMemberWithName( + ConstString("__s"), /*can_create=*/true); if (!short_sp) return {}; - ValueObjectSP short_fields_sp; ValueObjectSP is_long = short_sp->GetChildMemberWithName(ConstString("__is_long_"), true); - if (is_long) { - short_fields_sp = short_sp; - } else { - // After D128285, we need to access the `__is_long_` and `__size_` fields - // from a packed anonymous struct - short_fields_sp = short_sp->GetChildAtIndex(0, true); - is_long = short_sp->GetChildMemberWithName(ConstString("__is_long_"), true); - } + ValueObjectSP size_sp = + short_sp->GetChildAtNamePath({ConstString("__size_")}); + if (!size_sp) + return {}; if (is_long) { using_bitmasks = false; short_mode = !is_long->GetValueAsUnsigned(/*fail_value=*/0); - if (ValueObjectSP size_member = - dataval_sp->GetChildAtNamePath({ConstString("__s"), ConstString("__size_")})) - size = size_member->GetValueAsUnsigned(/*fail_value=*/0); - else - return {}; - } else if (layout == eLibcxxStringLayoutModeDSC) { - llvm::SmallVector size_mode_locations[] = { - {1, 2}, // Post-c3d0205ee771 layout. This was in use for only a brief - // period, so we can delete it if it becomes a burden. - {1, 1, 0}, - {1, 1, 1}, - }; - ValueObjectSP size_mode; - for (llvm::ArrayRef loc : size_mode_locations) { - size_mode = dataval_sp->GetChildAtIndexPath(loc); - if (size_mode && size_mode->GetName() == g_size_name) - break; - } - - if (!size_mode) - return {}; - - size_mode_value = (size_mode->GetValueAsUnsigned(0)); - short_mode = ((size_mode_value & 0x80) == 0); + size = size_sp->GetValueAsUnsigned(/*fail_value=*/0); } else { - ValueObjectSP size_mode(dataval_sp->GetChildAtIndexPath({1, 0, 0})); - if (!size_mode) - return {}; - - size_mode_value = (size_mode->GetValueAsUnsigned(0)); - short_mode = ((size_mode_value & 1) == 0); + // The string mode is encoded in the size field. + size_mode_value = size_sp->GetValueAsUnsigned(0); + uint8_t mode_mask = layout == StringLayout::DSC ? 0x80 : 1; + short_mode = (size_mode_value & mode_mask) == 0; } if (short_mode) { ValueObjectSP location_sp = - short_sp->GetChildMemberWithName(g_data_name, true); + short_sp->GetChildMemberWithName(ConstString("__data_"), true); if (using_bitmasks) - size = (layout == eLibcxxStringLayoutModeDSC) - ? size_mode_value - : ((size_mode_value >> 1) % 256); + size = (layout == StringLayout::DSC) ? size_mode_value + : ((size_mode_value >> 1) % 256); // When the small-string optimization takes place, the data must fit in the // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's @@ -656,10 +632,6 @@ ExtractLibcxxStringInfo(ValueObject &valobj) { return std::make_pair(size, location_sp); } - ValueObjectSP l(dataval_sp->GetChildAtIndex(0, true)); - if (!l) - return {}; - // we can use the layout_decider object as the data pointer ValueObjectSP location_sp = l->GetChildMemberWithName(ConstString("__data_"), /*can_create=*/true); @@ -667,19 +639,11 @@ ExtractLibcxxStringInfo(ValueObject &valobj) { l->GetChildMemberWithName(ConstString("__size_"), /*can_create=*/true); ValueObjectSP capacity_vo = l->GetChildMemberWithName(ConstString("__cap_"), /*can_create=*/true); - if (!capacity_vo) { - // After D128285, we need to access the `__cap_` field from a packed - // anonymous struct - if (ValueObjectSP packed_fields_sp = l->GetChildAtIndex(0, true)) { - ValueObjectSP capacity_vo = packed_fields_sp->GetChildMemberWithName( - ConstString("__cap_"), /*can_create=*/true); - } - } if (!size_vo || !location_sp || !capacity_vo) return {}; size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); uint64_t capacity = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); - if (!using_bitmasks && layout == eLibcxxStringLayoutModeCSD) + if (!using_bitmasks && layout == StringLayout::CSD) capacity *= 2; if (size == LLDB_INVALID_OFFSET || capacity == LLDB_INVALID_OFFSET || capacity < size) -- 2.7.4