From b2b993a6ae675955b1112473c035ae6b4e3932a0 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 22 Jul 2022 11:28:51 +0200 Subject: [PATCH] [pseudo] Eliminate multiple-specified-types ambiguities using guards Motivating case: `foo bar;` is not a declaration of nothing with `foo` and `bar` both types. This is a common and critical ambiguity, clangd/AST.cpp has 20% fewer ambiguous nodes (1674->1332) after this change. Differential Revision: https://reviews.llvm.org/D130337 --- clang-tools-extra/pseudo/lib/cxx/CXX.cpp | 121 ++++++++++++++++++++- clang-tools-extra/pseudo/lib/cxx/cxx.bnf | 27 ++--- .../pseudo/test/cxx/decl-specfier-seq.cpp | 27 +++++ clang-tools-extra/pseudo/test/fuzzer.cpp | 2 +- 4 files changed, 159 insertions(+), 18 deletions(-) create mode 100644 clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp index 7fc3a48..73af0a7a 100644 --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -14,7 +14,9 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Debug.h" #include +#define DEBUG_TYPE "CXX.cpp" namespace clang { namespace pseudo { @@ -160,7 +162,106 @@ bool guardNextTokenNotElse(const GuardParams &P) { return symbolToToken(P.Lookahead) != tok::kw_else; } +// Whether this e.g. decl-specifier contains an "exclusive" type such as a class +// name, and thus can't combine with a second exclusive type. +// +// Returns false for +// - non-types +// - "unsigned" etc that may suffice as types but may modify others +// - cases of uncertainty (e.g. due to ambiguity) +bool hasExclusiveType(const ForestNode *N) { + // FIXME: every time we apply this check, we walk the whole subtree. + // Add per-node caching instead. + while (true) { + assert(N->symbol() == (SymbolID)Symbol::decl_specifier_seq || + N->symbol() == (SymbolID)Symbol::type_specifier_seq || + N->symbol() == (SymbolID)Symbol::defining_type_specifier_seq || + N->symbol() == (SymbolID)Symbol::decl_specifier || + N->symbol() == (SymbolID)Symbol::type_specifier || + N->symbol() == (SymbolID)Symbol::defining_type_specifier || + N->symbol() == (SymbolID)Symbol::simple_type_specifier); + if (N->kind() == ForestNode::Opaque) + return false; // conservative + if (N->kind() == ForestNode::Ambiguous) + return llvm::all_of(N->alternatives(), hasExclusiveType); // conservative + // All supported symbols are nonterminals. + assert(N->kind() == ForestNode::Sequence); + switch (N->rule()) { + // seq := element seq: check element then continue into seq + case (RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq: + case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq: + case (RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq: + if (hasExclusiveType(N->children()[0])) + return true; + N = N->children()[1]; + continue; + // seq := element: continue into element + case (RuleID)Rule::decl_specifier_seq_0decl_specifier: + case (RuleID)Rule::type_specifier_seq_0type_specifier: + case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier: + N = N->children()[0]; + continue; + + // defining-type-specifier + case (RuleID)Rule::defining_type_specifier_0type_specifier: + N = N->children()[0]; + continue; + case (RuleID)Rule::defining_type_specifier_0class_specifier: + case (RuleID)Rule::defining_type_specifier_0enum_specifier: + return true; + + // decl-specifier + case (RuleID)Rule::decl_specifier_0defining_type_specifier: + N = N->children()[0]; + continue; + case (RuleID)Rule::decl_specifier_0consteval: + case (RuleID)Rule::decl_specifier_0constexpr: + case (RuleID)Rule::decl_specifier_0constinit: + case (RuleID)Rule::decl_specifier_0inline: + case (RuleID)Rule::decl_specifier_0friend: + case (RuleID)Rule::decl_specifier_0storage_class_specifier: + case (RuleID)Rule::decl_specifier_0typedef: + case (RuleID)Rule::decl_specifier_0function_specifier: + return false; + + // type-specifier + case (RuleID)Rule::type_specifier_0elaborated_type_specifier: + case (RuleID)Rule::type_specifier_0typename_specifier: + return true; + case (RuleID)Rule::type_specifier_0simple_type_specifier: + N = N->children()[0]; + continue; + case (RuleID)Rule::type_specifier_0cv_qualifier: + return false; + + // simple-type-specifier + case (RuleID)Rule::simple_type_specifier_0type_name: + case (RuleID)Rule::simple_type_specifier_0template_name: + case (RuleID)Rule::simple_type_specifier_0builtin_type: + case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_2simple_template_id: + case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_name: + case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1type_name: + case (RuleID)Rule::simple_type_specifier_0decltype_specifier: + case (RuleID)Rule::simple_type_specifier_0placeholder_type_specifier: + return true; + case (RuleID)Rule::simple_type_specifier_0long: + case (RuleID)Rule::simple_type_specifier_0short: + case (RuleID)Rule::simple_type_specifier_0signed: + case (RuleID)Rule::simple_type_specifier_0unsigned: + return false; + + default: + LLVM_DEBUG(llvm::errs() << "Unhandled rule " << N->rule() << "\n"); + llvm_unreachable("hasExclusiveType be exhaustive!"); + } + } +} + llvm::DenseMap buildGuards() { +#define GUARD(cond) \ + { \ + [](const GuardParams &P) { return cond; } \ + } #define TOKEN_GUARD(kind, cond) \ [](const GuardParams& P) { \ const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \ @@ -177,6 +278,16 @@ llvm::DenseMap buildGuards() { {(RuleID)Rule::non_function_declarator_0declarator, SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))}, + // A {decl,type,defining-type}-specifier-sequence cannot have multiple + // "exclusive" types (like class names): a value has only one type. + {(RuleID)Rule:: + defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq, + GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))}, + {(RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq, + GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))}, + {(RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq, + GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))}, + {(RuleID)Rule::contextual_override_0identifier, TOKEN_GUARD(identifier, Tok.text() == "override")}, {(RuleID)Rule::contextual_final_0identifier, @@ -190,10 +301,12 @@ llvm::DenseMap buildGuards() { {(RuleID)Rule::contextual_zero_0numeric_constant, TOKEN_GUARD(numeric_constant, Tok.text() == "0")}, - {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement, - guardNextTokenNotElse}, - {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement, - guardNextTokenNotElse}, + {(RuleID)Rule:: + selection_statement_0if_1l_paren_2condition_3r_paren_4statement, + guardNextTokenNotElse}, + {(RuleID)Rule:: + selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement, + guardNextTokenNotElse}, // The grammar distinguishes (only) user-defined vs plain string literals, // where the clang lexer distinguishes (only) encoding types. diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf index ca8fc01..8dfab8b 100644 --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -350,7 +350,7 @@ decl-specifier := CONSTEVAL decl-specifier := CONSTINIT decl-specifier := INLINE decl-specifier-seq := decl-specifier -decl-specifier-seq := decl-specifier decl-specifier-seq +decl-specifier-seq := decl-specifier decl-specifier-seq [guard] storage-class-specifier := STATIC storage-class-specifier := THREAD_LOCAL storage-class-specifier := EXTERN @@ -364,31 +364,32 @@ type-specifier := elaborated-type-specifier type-specifier := typename-specifier type-specifier := cv-qualifier type-specifier-seq := type-specifier -type-specifier-seq := type-specifier type-specifier-seq +type-specifier-seq := type-specifier type-specifier-seq [guard] defining-type-specifier := type-specifier defining-type-specifier := class-specifier defining-type-specifier := enum-specifier defining-type-specifier-seq := defining-type-specifier -defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq +defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq [guard] simple-type-specifier := nested-name-specifier_opt type-name simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id simple-type-specifier := decltype-specifier simple-type-specifier := placeholder-type-specifier simple-type-specifier := nested-name-specifier_opt template-name -simple-type-specifier := CHAR -simple-type-specifier := CHAR8_T -simple-type-specifier := CHAR16_T -simple-type-specifier := CHAR32_T -simple-type-specifier := WCHAR_T -simple-type-specifier := BOOL +simple-type-specifier := builtin-type +builtin-type := CHAR +builtin-type := CHAR8_T +builtin-type := CHAR16_T +builtin-type := CHAR32_T +builtin-type := WCHAR_T +builtin-type := BOOL simple-type-specifier := SHORT -simple-type-specifier := INT +builtin-type := INT simple-type-specifier := LONG simple-type-specifier := SIGNED simple-type-specifier := UNSIGNED -simple-type-specifier := FLOAT -simple-type-specifier := DOUBLE -simple-type-specifier := VOID +builtin-type := FLOAT +builtin-type := DOUBLE +builtin-type := VOID type-name := class-name type-name := enum-name type-name := typedef-name diff --git a/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp new file mode 100644 index 0000000..255e8be --- /dev/null +++ b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp @@ -0,0 +1,27 @@ +// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s + +// not parsed as Type{foo} Type{bar} +foo bar; +// CHECK-NOT: simple-declaration := decl-specifier-seq ; +// CHECK: simple-declaration := decl-specifier-seq init-declarator-list ; +// CHECK: ├─decl-specifier-seq~simple-type-specifier +// CHECK: ├─init-declarator-list~IDENTIFIER +// CHECK: └─; +// CHECK-NOT: simple-declaration := decl-specifier-seq ; + +// not parsed as Type{std} Type{::string} Declarator{s}; +std::string s; +// CHECK-NOT: nested-name-specifier := :: +// CHECK: simple-declaration := decl-specifier-seq init-declarator-list ; +// CHECK: ├─decl-specifier-seq~simple-type-specifier := +// CHECK: │ ├─simple-type-specifier := nested-name-specifier type-name +// CHECK: │ │ ├─nested-name-specifier := #1 +// CHECK: │ │ │ ├─nested-name-specifier := type-name :: +// CHECK: │ │ │ └─nested-name-specifier := namespace-name :: +// CHECK: │ │ └─type-name +// CHECK: │ └─simple-type-specifier := nested-name-specifier template-name +// CHECK: │ ├─nested-name-specifier =#1 +// CHECK: │ └─template-name~IDENTIFIER +// CHECK: ├─init-declarator-list~IDENTIFIER +// CHECK: └─; +// CHECK-NOT: nested-name-specifier := :: diff --git a/clang-tools-extra/pseudo/test/fuzzer.cpp b/clang-tools-extra/pseudo/test/fuzzer.cpp index 6f4d093..400746a 100644 --- a/clang-tools-extra/pseudo/test/fuzzer.cpp +++ b/clang-tools-extra/pseudo/test/fuzzer.cpp @@ -1,4 +1,4 @@ // RUN: clang-pseudo-fuzzer -grammar=%cxx-bnf-file -print %s | FileCheck %s int x; // CHECK: translation-unit := declaration-seq -// CHECK: simple-type-specifier := INT +// CHECK: builtin-type := INT -- 2.7.4