From: Haojian Wu Date: Tue, 1 Oct 2019 11:03:56 +0000 (+0000) Subject: [clangd] Implement getBeginning for overloaded operators. X-Git-Tag: llvmorg-11-init~7903 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9f2bf666bc0ec59be6684ef8b50cab22a4fad50e;p=platform%2Fupstream%2Fllvm.git [clangd] Implement getBeginning for overloaded operators. Summary: This will fix some bugs where navigation doesn't work on cases like `std::cout <^< "hello"`. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67695 llvm-svn: 373323 --- diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index ee0b74a..05ca7aa 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -237,6 +237,45 @@ llvm::Optional getTokenRange(const SourceManager &SM, return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End)); } +namespace { + +enum TokenFlavor { Identifier, Operator, Whitespace, Other }; + +bool isOverloadedOperator(const Token &Tok) { + switch (Tok.getKind()) { +#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemOnly) \ + case tok::Token: +#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemOnly) +#include "clang/Basic/OperatorKinds.def" + return true; + + default: + break; + } + return false; +} + +TokenFlavor getTokenFlavor(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + Token Tok; + Tok.setKind(tok::NUM_TOKENS); + if (Lexer::getRawToken(Loc, Tok, SM, LangOpts, + /*IgnoreWhiteSpace*/ false)) + return Other; + + // getRawToken will return false without setting Tok when the token is + // whitespace, so if the flag is not set, we are sure this is a whitespace. + if (Tok.is(tok::TokenKind::NUM_TOKENS)) + return Whitespace; + if (Tok.is(tok::TokenKind::raw_identifier)) + return Identifier; + if (isOverloadedOperator(Tok)) + return Operator; + return Other; +} + +} // namespace + SourceLocation getBeginningOfIdentifier(const Position &Pos, const SourceManager &SM, const LangOptions &LangOpts) { @@ -247,27 +286,57 @@ SourceLocation getBeginningOfIdentifier(const Position &Pos, return SourceLocation(); } - // GetBeginningOfToken(pos) is almost what we want, but does the wrong thing - // if the cursor is at the end of the identifier. - // Instead, we lex at GetBeginningOfToken(pos - 1). The cases are: - // 1) at the beginning of an identifier, we'll be looking at something - // that isn't an identifier. - // 2) at the middle or end of an identifier, we get the identifier. - // 3) anywhere outside an identifier, we'll get some non-identifier thing. - // We can't actually distinguish cases 1 and 3, but returning the original - // location is correct for both! + // GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong + // thing if the cursor is at the end of the token (identifier or operator). + // The cases are: + // 1) at the beginning of the token + // 2) at the middle of the token + // 3) at the end of the token + // 4) anywhere outside the identifier or operator + // To distinguish all cases, we lex both at the + // GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for + // cases 1 and 4, we just return the original location. SourceLocation InputLoc = SM.getComposedLoc(FID, *Offset); - if (*Offset == 0) // Case 1 or 3. + if (*Offset == 0) // Case 1 or 4. return InputLoc; SourceLocation Before = SM.getComposedLoc(FID, *Offset - 1); + SourceLocation BeforeTokBeginning = + Lexer::GetBeginningOfToken(Before, SM, LangOpts); + TokenFlavor BeforeKind = getTokenFlavor(BeforeTokBeginning, SM, LangOpts); + + SourceLocation CurrentTokBeginning = + Lexer::GetBeginningOfToken(InputLoc, SM, LangOpts); + TokenFlavor CurrentKind = getTokenFlavor(CurrentTokBeginning, SM, LangOpts); + + // At the middle of the token. + if (BeforeTokBeginning == CurrentTokBeginning) { + // For interesting token, we return the beginning of the token. + if (CurrentKind == Identifier || CurrentKind == Operator) + return CurrentTokBeginning; + // otherwise, we return the original loc. + return InputLoc; + } - Before = Lexer::GetBeginningOfToken(Before, SM, LangOpts); - Token Tok; - if (Before.isValid() && - !Lexer::getRawToken(Before, Tok, SM, LangOpts, false) && - Tok.is(tok::raw_identifier)) - return Before; // Case 2. - return InputLoc; // Case 1 or 3. + // Whitespace is not interesting. + if (BeforeKind == Whitespace) + return CurrentTokBeginning; + if (CurrentKind == Whitespace) + return BeforeTokBeginning; + + // The cursor is at the token boundary, e.g. "Before^Current", we prefer + // identifiers to other tokens. + if (CurrentKind == Identifier) + return CurrentTokBeginning; + if (BeforeKind == Identifier) + return BeforeTokBeginning; + // Then prefer overloaded operators to other tokens. + if (CurrentKind == Operator) + return CurrentTokBeginning; + if (BeforeKind == Operator) + return BeforeTokBeginning; + + // Non-interesting case, we just return the original location. + return InputLoc; } bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index 53a140f..3017746 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -79,7 +79,7 @@ llvm::Expected sourceLocationInMainFile(const SourceManager &SM, Position P); /// Get the beginning SourceLocation at a specified \p Pos in the main file. -/// May be invalid if Pos is, or if there's no identifier. +/// May be invalid if Pos is, or if there's no identifier or operators. /// The returned position is in the main file, callers may prefer to /// obtain the macro expansion location. SourceLocation getBeginningOfIdentifier(const Position &Pos, diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp index 771396d..e49f64c 100644 --- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp +++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp @@ -319,14 +319,29 @@ struct Bar { int func(); }; Bar* bar; )cpp"; // First ^ is the expected beginning, last is the search position. - for (std::string Text : std::vector{ + for (const std::string &Text : std::vector{ "int ^f^oo();", // inside identifier "int ^foo();", // beginning of identifier "int ^foo^();", // end of identifier "int foo(^);", // non-identifier "^int foo();", // beginning of file (can't back up) "int ^f0^0();", // after a digit (lexing at N-1 is wrong) - "int ^λλ^λ();", // UTF-8 handled properly when backing up + "/^/ comments", // non-interesting token + "void f(int abc) { abc ^ ++; }", // whitespace + "void f(int abc) { ^abc^++; }", // range of identifier + "void f(int abc) { ++^abc^; }", // range of identifier + "void f(int abc) { ++^abc; }", // range of identifier + "void f(int abc) { ^+^+abc; }", // range of operator + "void f(int abc) { ^abc^ ++; }", // range of identifier + "void f(int abc) { abc ^++^; }", // range of operator + "void f(int abc) { ^++^ abc; }", // range of operator + "void f(int abc) { ++ ^abc^; }", // range of identifier + "void f(int abc) { ^++^/**/abc; }", // range of operator + "void f(int abc) { ++/**/^abc; }", // range of identifier + "void f(int abc) { ^abc^/**/++; }", // range of identifier + "void f(int abc) { abc/**/^++; }", // range of operator + "void f() {^ }", // outside of identifier and operator + "int ^λλ^λ();", // UTF-8 handled properly when backing up // identifier in macro arg "MACRO(bar->^func())", // beginning of identifier diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index d42bed4..60cba82 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -441,6 +441,15 @@ TEST(LocateSymbol, All) { auto x = m^akeX(); } )cpp", + + R"cpp( + struct X { + X& [[operator]]++() {} + }; + void foo(X& x) { + +^+x; + } + )cpp", }; for (const char *Test : Tests) { Annotations T(Test);