return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End));
}
+namespace {
+
+enum TokenFlavor { Identifier, Operator, Whitespace, Other };
+
+bool isOverloadedOperator(const Token &Tok) {
+ switch (Tok.getKind()) {
+#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemOnly) \
+ case tok::Token:
+#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemOnly)
+#include "clang/Basic/OperatorKinds.def"
+ return true;
+
+ default:
+ break;
+ }
+ return false;
+}
+
+TokenFlavor getTokenFlavor(SourceLocation Loc, const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ Token Tok;
+ Tok.setKind(tok::NUM_TOKENS);
+ if (Lexer::getRawToken(Loc, Tok, SM, LangOpts,
+ /*IgnoreWhiteSpace*/ false))
+ return Other;
+
+ // getRawToken will return false without setting Tok when the token is
+ // whitespace, so if the flag is not set, we are sure this is a whitespace.
+ if (Tok.is(tok::TokenKind::NUM_TOKENS))
+ return Whitespace;
+ if (Tok.is(tok::TokenKind::raw_identifier))
+ return Identifier;
+ if (isOverloadedOperator(Tok))
+ return Operator;
+ return Other;
+}
+
+} // namespace
+
SourceLocation getBeginningOfIdentifier(const Position &Pos,
const SourceManager &SM,
const LangOptions &LangOpts) {
return SourceLocation();
}
- // GetBeginningOfToken(pos) is almost what we want, but does the wrong thing
- // if the cursor is at the end of the identifier.
- // Instead, we lex at GetBeginningOfToken(pos - 1). The cases are:
- // 1) at the beginning of an identifier, we'll be looking at something
- // that isn't an identifier.
- // 2) at the middle or end of an identifier, we get the identifier.
- // 3) anywhere outside an identifier, we'll get some non-identifier thing.
- // We can't actually distinguish cases 1 and 3, but returning the original
- // location is correct for both!
+ // GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong
+ // thing if the cursor is at the end of the token (identifier or operator).
+ // The cases are:
+ // 1) at the beginning of the token
+ // 2) at the middle of the token
+ // 3) at the end of the token
+ // 4) anywhere outside the identifier or operator
+ // To distinguish all cases, we lex both at the
+ // GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for
+ // cases 1 and 4, we just return the original location.
SourceLocation InputLoc = SM.getComposedLoc(FID, *Offset);
- if (*Offset == 0) // Case 1 or 3.
+ if (*Offset == 0) // Case 1 or 4.
return InputLoc;
SourceLocation Before = SM.getComposedLoc(FID, *Offset - 1);
+ SourceLocation BeforeTokBeginning =
+ Lexer::GetBeginningOfToken(Before, SM, LangOpts);
+ TokenFlavor BeforeKind = getTokenFlavor(BeforeTokBeginning, SM, LangOpts);
+
+ SourceLocation CurrentTokBeginning =
+ Lexer::GetBeginningOfToken(InputLoc, SM, LangOpts);
+ TokenFlavor CurrentKind = getTokenFlavor(CurrentTokBeginning, SM, LangOpts);
+
+ // At the middle of the token.
+ if (BeforeTokBeginning == CurrentTokBeginning) {
+ // For interesting token, we return the beginning of the token.
+ if (CurrentKind == Identifier || CurrentKind == Operator)
+ return CurrentTokBeginning;
+ // otherwise, we return the original loc.
+ return InputLoc;
+ }
- Before = Lexer::GetBeginningOfToken(Before, SM, LangOpts);
- Token Tok;
- if (Before.isValid() &&
- !Lexer::getRawToken(Before, Tok, SM, LangOpts, false) &&
- Tok.is(tok::raw_identifier))
- return Before; // Case 2.
- return InputLoc; // Case 1 or 3.
+ // Whitespace is not interesting.
+ if (BeforeKind == Whitespace)
+ return CurrentTokBeginning;
+ if (CurrentKind == Whitespace)
+ return BeforeTokBeginning;
+
+ // The cursor is at the token boundary, e.g. "Before^Current", we prefer
+ // identifiers to other tokens.
+ if (CurrentKind == Identifier)
+ return CurrentTokBeginning;
+ if (BeforeKind == Identifier)
+ return BeforeTokBeginning;
+ // Then prefer overloaded operators to other tokens.
+ if (CurrentKind == Operator)
+ return CurrentTokBeginning;
+ if (BeforeKind == Operator)
+ return BeforeTokBeginning;
+
+ // Non-interesting case, we just return the original location.
+ return InputLoc;
}
bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
Bar* bar;
)cpp";
// First ^ is the expected beginning, last is the search position.
- for (std::string Text : std::vector<std::string>{
+ for (const std::string &Text : std::vector<std::string>{
"int ^f^oo();", // inside identifier
"int ^foo();", // beginning of identifier
"int ^foo^();", // end of identifier
"int foo(^);", // non-identifier
"^int foo();", // beginning of file (can't back up)
"int ^f0^0();", // after a digit (lexing at N-1 is wrong)
- "int ^λλ^λ();", // UTF-8 handled properly when backing up
+ "/^/ comments", // non-interesting token
+ "void f(int abc) { abc ^ ++; }", // whitespace
+ "void f(int abc) { ^abc^++; }", // range of identifier
+ "void f(int abc) { ++^abc^; }", // range of identifier
+ "void f(int abc) { ++^abc; }", // range of identifier
+ "void f(int abc) { ^+^+abc; }", // range of operator
+ "void f(int abc) { ^abc^ ++; }", // range of identifier
+ "void f(int abc) { abc ^++^; }", // range of operator
+ "void f(int abc) { ^++^ abc; }", // range of operator
+ "void f(int abc) { ++ ^abc^; }", // range of identifier
+ "void f(int abc) { ^++^/**/abc; }", // range of operator
+ "void f(int abc) { ++/**/^abc; }", // range of identifier
+ "void f(int abc) { ^abc^/**/++; }", // range of identifier
+ "void f(int abc) { abc/**/^++; }", // range of operator
+ "void f() {^ }", // outside of identifier and operator
+ "int ^λλ^λ();", // UTF-8 handled properly when backing up
// identifier in macro arg
"MACRO(bar->^func())", // beginning of identifier