From 0662b6a7c1b3b04a4ca31a09af703c91c7aa9646 Mon Sep 17 00:00:00 2001 From: Pedro Alves Date: Fri, 24 Nov 2017 23:30:04 +0000 Subject: [PATCH] Make strcmp_iw NOT ignore whitespace in the middle of tokens currently "b func tion" manages to set a breakpoint at "function" ! All these years I had never noticed this, but now that the linespec completer actually works, this easily happens by accident, with: "b func t" expecting to get "thread", but getting instead: "b func tion" ... Also, this: "b rettypefunc" manages to set a breakpoint on "rettype func()". These things happen due to strcmp_iw "magic". Fix it by teaching strcmp_iw about when can it skip whitespace. This required handling user-defined operators, and scope operators, complicating the code a bit, unfortunately. I added unit tests for all the corner cases I stumbled on, as I was developing this, and then in the end wrote a testsuite testcase covering many of the same things and more (to be added later). gdb/ChangeLog: 2017-11-24 Pedro Alves * cp-support.c (cp_symbol_name_matches_1): New, factored out from cp_fq_symbol_name_matches. Pass language_cplus to strncmp_with_mode. (cp_fq_symbol_name_matches): Call cp_symbol_name_matches_1. (selftests::test_cp_symbol_name_cmp): New. (_initialize_cp_support): Register "cp_symbol_name_matches" selftests. * language.c (default_symbol_name_matcher): Pass language_minimal to strncmp_iw_with_mode. * utils.c: Include "cp-support.h" and . (valid_identifier_name_char, cp_skip_operator_token, skip_ws) (cp_is_operator): New functions. (strncmp_iw_with_mode): Use them. Add language parameter. Don't skip whitespace in the symbol name when the lookup name doesn't have spaces, and vice versa. (strncmp_iw, strcmp_iw): Pass language to strncmp_iw_with_mode. * utils.h (strncmp_iw_with_mode): Add language parameter. --- gdb/ChangeLog | 20 +++++ gdb/cp-support.c | 178 +++++++++++++++++++++++++++++++++++++++--- gdb/language.c | 2 +- gdb/utils.c | 233 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- gdb/utils.h | 18 ++++- 5 files changed, 429 insertions(+), 22 deletions(-) diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 26d5cd3..befce60 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,23 @@ +2017-11-24 Pedro Alves + + * cp-support.c (cp_symbol_name_matches_1): New, factored out from + cp_fq_symbol_name_matches. Pass language_cplus to + strncmp_with_mode. + (cp_fq_symbol_name_matches): Call cp_symbol_name_matches_1. + (selftests::test_cp_symbol_name_cmp): New. + (_initialize_cp_support): Register "cp_symbol_name_matches" + selftests. + * language.c (default_symbol_name_matcher): Pass language_minimal + to strncmp_iw_with_mode. + * utils.c: Include "cp-support.h" and . + (valid_identifier_name_char, cp_skip_operator_token, skip_ws) + (cp_is_operator): New functions. + (strncmp_iw_with_mode): Use them. Add language parameter. Don't + skip whitespace in the symbol name when the lookup name doesn't + have spaces, and vice versa. + (strncmp_iw, strcmp_iw): Pass language to strncmp_iw_with_mode. + * utils.h (strncmp_iw_with_mode): Add language parameter. + 2017-11-24 Joel Brobecker * ada-lang.c (ada_exception_message_1, ada_exception_message): diff --git a/gdb/cp-support.c b/gdb/cp-support.c index 1cab69b..368112a 100644 --- a/gdb/cp-support.c +++ b/gdb/cp-support.c @@ -1617,6 +1617,39 @@ gdb_sniff_from_mangled_name (const char *mangled, char **demangled) /* C++ symbol_name_matcher_ftype implementation. */ +/* Helper for cp_fq_symbol_name_matches (i.e., + symbol_name_matcher_ftype implementation). Split to a separate + function for unit-testing convenience. + + See symbol_name_matcher_ftype for description of SYMBOL_SEARCH_NAME + and COMP_MATCH_RES. + + LOOKUP_NAME/LOOKUP_NAME_LEN is the name we're looking up. + + See strncmp_iw_with_mode for description of MODE. +*/ + +static bool +cp_symbol_name_matches_1 (const char *symbol_search_name, + const char *lookup_name, + size_t lookup_name_len, + strncmp_iw_mode mode, + completion_match *match) +{ + if (strncmp_iw_with_mode (symbol_search_name, + lookup_name, lookup_name_len, + mode, language_cplus) == 0) + { + if (match != NULL) + match->set_match (symbol_search_name); + return true; + } + + return false; +} + +/* C++ symbol_name_matcher_ftype implementation. */ + static bool cp_fq_symbol_name_matches (const char *symbol_search_name, const lookup_name_info &lookup_name, @@ -1629,16 +1662,9 @@ cp_fq_symbol_name_matches (const char *symbol_search_name, ? strncmp_iw_mode::NORMAL : strncmp_iw_mode::MATCH_PARAMS); - if (strncmp_iw_with_mode (symbol_search_name, - name.c_str (), name.size (), - mode) == 0) - { - if (match != NULL) - match->set_match (symbol_search_name); - return true; - } - - return false; + return cp_symbol_name_matches_1 (symbol_search_name, + name.c_str (), name.size (), + mode, match); } /* See cp-support.h. */ @@ -1653,6 +1679,136 @@ cp_get_symbol_name_matcher (const lookup_name_info &lookup_name) namespace selftests { +void +test_cp_symbol_name_matches () +{ +#define CHECK_MATCH(SYMBOL, INPUT) \ + SELF_CHECK (cp_symbol_name_matches_1 (SYMBOL, \ + INPUT, sizeof (INPUT) - 1, \ + strncmp_iw_mode::MATCH_PARAMS, \ + NULL)) + +#define CHECK_NOT_MATCH(SYMBOL, INPUT) \ + SELF_CHECK (!cp_symbol_name_matches_1 (SYMBOL, \ + INPUT, sizeof (INPUT) - 1, \ + strncmp_iw_mode::MATCH_PARAMS, \ + NULL)) + + /* Like CHECK_MATCH, and also check that INPUT (and all substrings + that start at index 0) completes to SYMBOL. */ +#define CHECK_MATCH_C(SYMBOL, INPUT) \ + do \ + { \ + CHECK_MATCH (SYMBOL, INPUT); \ + for (size_t i = 0; i < sizeof (INPUT) - 1; i++) \ + SELF_CHECK (cp_symbol_name_matches_1 (SYMBOL, INPUT, i, \ + strncmp_iw_mode::NORMAL, \ + NULL)); \ + } while (0) + + /* Like CHECK_NOT_MATCH, and also check that INPUT does NOT complete + to SYMBOL. */ +#define CHECK_NOT_MATCH_C(SYMBOL, INPUT) \ + do \ + { \ + CHECK_NOT_MATCH (SYMBOL, INPUT); \ + SELF_CHECK (!cp_symbol_name_matches_1 (SYMBOL, INPUT, \ + sizeof (INPUT) - 1, \ + strncmp_iw_mode::NORMAL, \ + NULL)); \ + } while (0) + + /* Lookup name without parens matches all overloads. */ + CHECK_MATCH_C ("function()", "function"); + CHECK_MATCH_C ("function(int)", "function"); + + /* Check whitespace around parameters is ignored. */ + CHECK_MATCH_C ("function()", "function ()"); + CHECK_MATCH_C ("function ( )", "function()"); + CHECK_MATCH_C ("function ()", "function( )"); + CHECK_MATCH_C ("func(int)", "func( int )"); + CHECK_MATCH_C ("func(int)", "func ( int ) "); + CHECK_MATCH_C ("func ( int )", "func( int )"); + CHECK_MATCH_C ("func ( int )", "func ( int ) "); + + /* Check symbol name prefixes aren't incorrectly matched. */ + CHECK_NOT_MATCH ("func", "function"); + CHECK_NOT_MATCH ("function", "func"); + CHECK_NOT_MATCH ("function()", "func"); + + /* Check that if the lookup name includes parameters, only the right + overload matches. */ + CHECK_MATCH_C ("function(int)", "function(int)"); + CHECK_NOT_MATCH_C ("function(int)", "function()"); + + /* Check that whitespace within symbol names is not ignored. */ + CHECK_NOT_MATCH_C ("function", "func tion"); + CHECK_NOT_MATCH_C ("func__tion", "func_ _tion"); + CHECK_NOT_MATCH_C ("func11tion", "func1 1tion"); + + /* Check the converse, which can happen with template function, + where the return type is part of the demangled name. */ + CHECK_NOT_MATCH_C ("func tion", "function"); + CHECK_NOT_MATCH_C ("func1 1tion", "func11tion"); + CHECK_NOT_MATCH_C ("func_ _tion", "func__tion"); + + /* Within parameters too. */ + CHECK_NOT_MATCH_C ("func(param)", "func(par am)"); + + /* Check handling of whitespace around C++ operators. */ + CHECK_NOT_MATCH_C ("operator<<", "opera tor<<"); + CHECK_NOT_MATCH_C ("operator<<", "operator< <"); + CHECK_NOT_MATCH_C ("operator<<", "operator < <"); + CHECK_NOT_MATCH_C ("operator==", "operator= ="); + CHECK_NOT_MATCH_C ("operator==", "operator = ="); + CHECK_MATCH_C ("operator<<", "operator <<"); + CHECK_MATCH_C ("operator<<()", "operator <<"); + CHECK_NOT_MATCH_C ("operator<<()", "operator<<(int)"); + CHECK_NOT_MATCH_C ("operator<<(int)", "operator<<()"); + CHECK_MATCH_C ("operator==", "operator =="); + CHECK_MATCH_C ("operator==()", "operator =="); + CHECK_MATCH_C ("operator <<", "operator<<"); + CHECK_MATCH_C ("operator ==", "operator=="); + CHECK_MATCH_C ("operator bool", "operator bool"); + CHECK_MATCH_C ("operator bool ()", "operator bool"); + CHECK_MATCH_C ("operatorX<<", "operatorX < <"); + CHECK_MATCH_C ("Xoperator<<", "Xoperator < <"); + + CHECK_MATCH_C ("operator()(int)", "operator()(int)"); + CHECK_MATCH_C ("operator()(int)", "operator ( ) ( int )"); + CHECK_MATCH_C ("operator()(int)", "operator ( ) < long > ( int )"); + /* The first "()" is not the parameter list. */ + CHECK_NOT_MATCH ("operator()(int)", "operator"); + + /* Misc user-defined operator tests. */ + + CHECK_NOT_MATCH_C ("operator/=()", "operator ^="); + /* Same length at end of input. */ + CHECK_NOT_MATCH_C ("operator>>", "operator[]"); + /* Same length but not at end of input. */ + CHECK_NOT_MATCH_C ("operator>>()", "operator[]()"); + + CHECK_MATCH_C ("base::operator char*()", "base::operator char*()"); + CHECK_MATCH_C ("base::operator char*()", "base::operator char * ()"); + CHECK_MATCH_C ("base::operator char**()", "base::operator char * * ()"); + CHECK_MATCH ("base::operator char**()", "base::operator char * *"); + CHECK_MATCH_C ("base::operator*()", "base::operator*()"); + CHECK_NOT_MATCH_C ("base::operator char*()", "base::operatorc"); + CHECK_NOT_MATCH ("base::operator char*()", "base::operator char"); + CHECK_NOT_MATCH ("base::operator char*()", "base::operat"); + + /* Check handling of whitespace around C++ scope operators. */ + CHECK_NOT_MATCH_C ("foo::bar", "foo: :bar"); + CHECK_MATCH_C ("foo::bar", "foo :: bar"); + CHECK_MATCH_C ("foo :: bar", "foo::bar"); + + CHECK_MATCH_C ("abc::def::ghi()", "abc::def::ghi()"); + CHECK_MATCH_C ("abc::def::ghi ( )", "abc::def::ghi()"); + CHECK_MATCH_C ("abc::def::ghi()", "abc::def::ghi ( )"); + CHECK_MATCH_C ("function()", "function()"); + CHECK_MATCH_C ("bar::function()", "bar::function()"); +} + /* If non-NULL, return STR wrapped in quotes. Otherwise, return a "" string (with no quotes). */ @@ -1856,6 +2012,8 @@ display the offending symbol."), #endif #if GDB_SELF_TEST + selftests::register_test ("cp_symbol_name_matches", + selftests::test_cp_symbol_name_matches); selftests::register_test ("cp_remove_params", selftests::test_cp_remove_params); #endif diff --git a/gdb/language.c b/gdb/language.c index 76047c7..2a1419c 100644 --- a/gdb/language.c +++ b/gdb/language.c @@ -713,7 +713,7 @@ default_symbol_name_matcher (const char *symbol_search_name, : strncmp_iw_mode::MATCH_PARAMS); if (strncmp_iw_with_mode (symbol_search_name, name.c_str (), name.size (), - mode) == 0) + mode, language_minimal) == 0) { if (match != NULL) match->set_match (symbol_search_name); diff --git a/gdb/utils.c b/gdb/utils.c index b5c011b..3e817ed 100644 --- a/gdb/utils.c +++ b/gdb/utils.c @@ -68,6 +68,8 @@ #include "job-control.h" #include "common/selftest.h" #include "common/gdb_optional.h" +#include "cp-support.h" +#include #if !HAVE_DECL_MALLOC extern PTR malloc (); /* ARI: PTR */ @@ -2156,22 +2158,233 @@ fprintf_symbol_filtered (struct ui_file *stream, const char *name, } } +/* True if CH is a character that can be part of a symbol name. I.e., + either a number, a letter, or a '_'. */ + +static bool +valid_identifier_name_char (int ch) +{ + return (isalnum (ch) || ch == '_'); +} + +/* Skip to end of token, or to END, whatever comes first. Input is + assumed to be a C++ operator name. */ + +static const char * +cp_skip_operator_token (const char *token, const char *end) +{ + const char *p = token; + while (p != end && !isspace (*p) && *p != '(') + { + if (valid_identifier_name_char (*p)) + { + while (p != end && valid_identifier_name_char (*p)) + p++; + return p; + } + else + { + /* Note, ordered such that among ops that share a prefix, + longer comes first. This is so that the loop below can + bail on first match. */ + static const char *ops[] = + { + "[", + "]", + "~", + ",", + "-=", "--", "->", "-", + "+=", "++", "+", + "*=", "*", + "/=", "/", + "%=", "%", + "|=", "||", "|", + "&=", "&&", "&", + "^=", "^", + "!=", "!", + "<<=", "<=", "<<", "<", + ">>=", ">=", ">>", ">", + "==", "=", + }; + + for (const char *op : ops) + { + size_t oplen = strlen (op); + size_t lencmp = std::min (oplen, end - p); + + if (strncmp (p, op, lencmp) == 0) + return p + lencmp; + } + /* Some unidentified character. Return it. */ + return p + 1; + } + } + + return p; +} + +/* Advance STRING1/STRING2 past whitespace. */ + +static void +skip_ws (const char *&string1, const char *&string2, const char *end_str2) +{ + while (isspace (*string1)) + string1++; + while (string2 < end_str2 && isspace (*string2)) + string2++; +} + +/* True if STRING points at the start of a C++ operator name. START + is the start of the string that STRING points to, hence when + reading backwards, we must not read any character before START. */ + +static bool +cp_is_operator (const char *string, const char *start) +{ + return ((string == start + || !valid_identifier_name_char (string[-1])) + && strncmp (string, CP_OPERATOR_STR, CP_OPERATOR_LEN) == 0 + && !valid_identifier_name_char (string[CP_OPERATOR_LEN])); +} + /* See utils.h. */ int strncmp_iw_with_mode (const char *string1, const char *string2, - size_t string2_len, strncmp_iw_mode mode) + size_t string2_len, strncmp_iw_mode mode, + enum language language) { + const char *string1_start = string1; const char *end_str2 = string2 + string2_len; + bool skip_spaces = true; + bool have_colon_op = (language == language_cplus + || language == language_rust + || language == language_fortran); while (1) { - while (isspace (*string1)) - string1++; - while (string2 < end_str2 && isspace (*string2)) - string2++; + if (skip_spaces + || ((isspace (*string1) && !valid_identifier_name_char (*string2)) + || (isspace (*string2) && !valid_identifier_name_char (*string1)))) + { + skip_ws (string1, string2, end_str2); + skip_spaces = false; + } + if (*string1 == '\0' || string2 == end_str2) break; + + /* Handle the :: operator. */ + if (have_colon_op && string1[0] == ':' && string1[1] == ':') + { + if (*string2 != ':') + return 1; + + string1++; + string2++; + + if (string2 == end_str2) + break; + + if (*string2 != ':') + return 1; + + string1++; + string2++; + + while (isspace (*string1)) + string1++; + while (string2 < end_str2 && isspace (*string2)) + string2++; + continue; + } + + /* Handle C++ user-defined operators. */ + else if (language == language_cplus + && *string1 == 'o') + { + if (cp_is_operator (string1, string1_start)) + { + /* An operator name in STRING1. Check STRING2. */ + size_t cmplen + = std::min (CP_OPERATOR_LEN, end_str2 - string2); + if (strncmp (string1, string2, cmplen) != 0) + return 1; + + string1 += cmplen; + string2 += cmplen; + + if (string2 != end_str2) + { + /* Check for "operatorX" in STRING2. */ + if (valid_identifier_name_char (*string2)) + return 1; + + skip_ws (string1, string2, end_str2); + } + + /* Handle operator(). */ + if (*string1 == '(') + { + if (string2 == end_str2) + { + if (mode == strncmp_iw_mode::NORMAL) + return 0; + else + { + /* Don't break for the regular return at the + bottom, because "operator" should not + match "operator()", since this open + parentheses is not the parameter list + start. */ + return *string1 != '\0'; + } + } + + if (*string1 != *string2) + return 1; + + string1++; + string2++; + } + + while (1) + { + skip_ws (string1, string2, end_str2); + + /* Skip to end of token, or to END, whatever comes + first. */ + const char *end_str1 = string1 + strlen (string1); + const char *p1 = cp_skip_operator_token (string1, end_str1); + const char *p2 = cp_skip_operator_token (string2, end_str2); + + cmplen = std::min (p1 - string1, p2 - string2); + if (p2 == end_str2) + { + if (strncmp (string1, string2, cmplen) != 0) + return 1; + } + else + { + if (p1 - string1 != p2 - string2) + return 1; + if (strncmp (string1, string2, cmplen) != 0) + return 1; + } + + string1 += cmplen; + string2 += cmplen; + + if (*string1 == '\0' || string2 == end_str2) + break; + if (*string1 == '(' || *string2 == '(') + break; + } + + continue; + } + } + if (case_sensitivity == case_sensitive_on && *string1 != *string2) break; if (case_sensitivity == case_sensitive_off @@ -2179,6 +2392,12 @@ strncmp_iw_with_mode (const char *string1, const char *string2, != tolower ((unsigned char) *string2))) break; + /* If we see any non-whitespace, non-identifier-name character + (any of "()<>*&" etc.), then skip spaces the next time + around. */ + if (!isspace (*string1) && !valid_identifier_name_char (*string1)) + skip_spaces = true; + string1++; string2++; } @@ -2200,7 +2419,7 @@ int strncmp_iw (const char *string1, const char *string2, size_t string2_len) { return strncmp_iw_with_mode (string1, string2, string2_len, - strncmp_iw_mode::NORMAL); + strncmp_iw_mode::NORMAL, language_minimal); } /* See utils.h. */ @@ -2209,7 +2428,7 @@ int strcmp_iw (const char *string1, const char *string2) { return strncmp_iw_with_mode (string1, string2, strlen (string2), - strncmp_iw_mode::MATCH_PARAMS); + strncmp_iw_mode::MATCH_PARAMS, language_minimal); } /* This is like strcmp except that it ignores whitespace and treats diff --git a/gdb/utils.h b/gdb/utils.h index e2fa430..dff4b17 100644 --- a/gdb/utils.h +++ b/gdb/utils.h @@ -48,17 +48,24 @@ enum class strncmp_iw_mode /* Helper for strcmp_iw and strncmp_iw. Exported so that languages can implement both NORMAL and MATCH_PARAMS variants in a single - function and defer part of the work to strncmp_iw_with_mode. */ + function and defer part of the work to strncmp_iw_with_mode. + LANGUAGE is used to implement some context-sensitive + language-specific comparisons. For example, for C++, + "string1=operator()" should not match "string2=operator" even in + MATCH_PARAMS mode. */ extern int strncmp_iw_with_mode (const char *string1, const char *string2, size_t string2_len, - strncmp_iw_mode mode); + strncmp_iw_mode mode, + enum language language); /* Do a strncmp() type operation on STRING1 and STRING2, ignoring any differences in whitespace. STRING2_LEN is STRING2's length. Returns 0 if STRING1 matches STRING2_LEN characters of STRING2, non-zero otherwise (slightly different than strncmp()'s range of - return values). */ + return values). Note: passes language_minimal to + strncmp_iw_with_mode, and should therefore be avoided if a more + suitable language is available. */ extern int strncmp_iw (const char *string1, const char *string2, size_t string2_len); @@ -70,7 +77,10 @@ extern int strncmp_iw (const char *string1, const char *string2, As an extra hack, string1=="FOO(ARGS)" matches string2=="FOO". This "feature" is useful when searching for matching C++ function names (such as if the user types 'break FOO', where FOO is a - mangled C++ function). */ + mangled C++ function). + + Note: passes language_minimal to strncmp_iw_with_mode, and should + therefore be avoided if a more suitable language is available. */ extern int strcmp_iw (const char *string1, const char *string2); extern int strcmp_iw_ordered (const char *, const char *); -- 2.7.4