/// whether trigraphs are enabled or not.
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs) {
char Res = GetTrigraphCharForLetter(*CP);
- if (!Res || !L) return Res;
+ if (!Res)
+ return Res;
if (!Trigraphs) {
- if (!L->isLexingRawMode())
+ if (L && !L->isLexingRawMode())
L->Diag(CP-2, diag::trigraph_ignored);
return 0;
}
- if (!L->isLexingRawMode())
+ if (L && !L->isLexingRawMode())
L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
return Res;
}
if (!Delimited)
break;
if (Diagnose)
- Diag(BufferPtr, diag::warn_delimited_ucn_incomplete)
+ Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
<< StringRef(KindLoc, 1);
return std::nullopt;
}
if (Count == 0) {
if (Diagnose)
- Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
+ Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
: diag::warn_ucn_escape_no_digits)
<< StringRef(KindLoc, 1);
return std::nullopt;
if (Delimited && Kind == 'U') {
if (Diagnose)
- Diag(StartPtr, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
+ Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
return std::nullopt;
}
if (!Delimited && Count != NumHexDigits) {
if (Diagnose) {
- Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
+ Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
// If the user wrote \U1234, suggest a fixit to \u.
if (Count == 4 && NumHexDigits == 8) {
CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
}
if (Delimited && PP) {
- Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b
- ? diag::warn_cxx2b_delimited_escape_sequence
- : diag::ext_delimited_escape_sequence)
+ Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b
+ ? diag::warn_cxx2b_delimited_escape_sequence
+ : diag::ext_delimited_escape_sequence)
<< /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0);
}
if (Result) {
Result->setFlag(Token::HasUCN);
- if (CurPtr - StartPtr == (ptrdiff_t)(Count + 2 + (Delimited ? 2 : 0)))
+ // If the UCN contains either a trigraph or a line splicing,
+ // we need to call getAndAdvanceChar again to set the appropriate flags
+ // on Result.
+ if (CurPtr - StartPtr == (ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
StartPtr = CurPtr;
else
while (StartPtr != CurPtr)
}
llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,
+ const char *SlashLoc,
Token *Result) {
unsigned CharSize;
bool Diagnose = Result && !isLexingRawMode();
C = getCharAndSize(CurPtr, CharSize);
if (C != '{') {
if (Diagnose)
- Diag(StartPtr, diag::warn_ucn_escape_incomplete);
+ Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
return std::nullopt;
}
CurPtr += CharSize;
break;
}
- if (!isAlphanumeric(C) && C != '_' && C != '-' && C != ' ')
+ if (isVerticalWhitespace(C))
break;
Buffer.push_back(C);
}
if (!FoundEndDelimiter || Buffer.empty()) {
if (Diagnose)
- Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
+ Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
: diag::warn_delimited_ucn_incomplete)
<< StringRef(KindLoc, 1);
return std::nullopt;
}
StringRef Name(Buffer.data(), Buffer.size());
- llvm::Optional<char32_t> Res =
+ llvm::Optional<char32_t> Match =
llvm::sys::unicode::nameToCodepointStrict(Name);
llvm::Optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
- if (!Res) {
- if (!isLexingRawMode()) {
- Diag(StartPtr, diag::err_invalid_ucn_name)
- << StringRef(Buffer.data(), Buffer.size());
- LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
+ if (!Match) {
+ LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
+ if (Diagnose) {
+ Diag(StartName, diag::err_invalid_ucn_name)
+ << StringRef(Buffer.data(), Buffer.size())
+ << makeCharRange(*this, StartName, CurPtr - CharSize);
if (LooseMatch) {
Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
<< FixItHint::CreateReplacement(
LooseMatch->Name);
}
}
- // When finding a match using Unicode loose matching rules
- // recover after having emitted a diagnostic.
- if (!LooseMatch)
- return std::nullopt;
// We do not offer misspelled character names suggestions here
// as the set of what would be a valid suggestion depends on context,
// and we should not make invalid suggestions.
}
- if (Diagnose && PP && !LooseMatch)
- Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b
- ? diag::warn_cxx2b_delimited_escape_sequence
- : diag::ext_delimited_escape_sequence)
+ if (Diagnose && Match)
+ Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b
+ ? diag::warn_cxx2b_delimited_escape_sequence
+ : diag::ext_delimited_escape_sequence)
<< /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0);
- if (LooseMatch)
- Res = LooseMatch->CodePoint;
+ // If no diagnostic has been emitted yet, likely because we are doing a
+ // tentative lexing, we do not want to recover here to make sure the token
+ // will not be incorrectly considered valid. This function will be called
+ // again and a diagnostic emitted then.
+ if (LooseMatch && Diagnose)
+ Match = LooseMatch->CodePoint;
if (Result) {
Result->setFlag(Token::HasUCN);
- if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 4))
+ // If the UCN contains either a trigraph or a line splicing,
+ // we need to call getAndAdvanceChar again to set the appropriate flags
+ // on Result.
+ if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 3))
StartPtr = CurPtr;
else
while (StartPtr != CurPtr)
} else {
StartPtr = CurPtr;
}
- return *Res;
+ return Match ? llvm::Optional<uint32_t>(*Match) : std::nullopt;
}
uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
if (Kind == 'u' || Kind == 'U')
CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc, Result);
else if (Kind == 'N')
- CodePointOpt = tryReadNamedUCN(StartPtr, Result);
+ CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc, Result);
if (!CodePointOpt)
return 0;
-// RUN: %clang_cc1 %s -fsyntax-only -std=c99 -pedantic -verify=expected,ext -Wundef
-// RUN: %clang_cc1 %s -fsyntax-only -x c++ -pedantic -verify=expected,ext -Wundef
-// RUN: %clang_cc1 %s -fsyntax-only -x c++ -std=c++2b -pedantic -ftrigraphs -verify=expected,cxx2b -Wundef -Wpre-c++2b-compat
+// RUN: %clang_cc1 %s -fsyntax-only -std=c99 -pedantic -verify=expected,ext -Wundef -DTRIGRAPHS=1
+// RUN: %clang_cc1 %s -fsyntax-only -x c++ -pedantic -verify=expected,ext -Wundef -fno-trigraphs
+// RUN: %clang_cc1 %s -fsyntax-only -x c++ -std=c++2b -pedantic -ftrigraphs -DTRIGRAPHS=1 -verify=expected,cxx2b -Wundef -Wpre-c++2b-compat
// RUN: %clang_cc1 %s -fsyntax-only -x c++ -pedantic -verify=expected,ext -Wundef -ftrigraphs -DTRIGRAPHS=1
// RUN: not %clang_cc1 %s -fsyntax-only -std=c99 -pedantic -Wundef 2>&1 | FileCheck -strict-whitespace %s
// ext-warning {{extension}} cxx2b-warning {{before C++2b}}
#define \N{WASTEBASKET} // expected-error {{macro name must be an identifier}} \
// ext-warning {{extension}} cxx2b-warning {{before C++2b}}
-
#define a\u0024
#if \u0110 // expected-warning {{is not defined, evaluates to 0}}
#define \u{123456789} // expected-error {{hex escape sequence out of range}} expected-error {{macro name must be an identifier}}
#define \u{ // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
#define \u{fgh} // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
-#define \N{ // expected-warning {{incomplete delimited universal character name; treating as '\' 'N' '{' identifier}} expected-error {{macro name must be an identifier}}
+#define \N{
+// expected-warning@-1 {{incomplete delimited universal character name; treating as '\' 'N' '{' identifier}}
+// expected-error@-2 {{macro name must be an identifier}}
#define \N{} // expected-warning {{empty delimited universal character name; treating as '\' 'N' '{' '}'}} expected-error {{macro name must be an identifier}}
#define \N{NOTATHING} // expected-error {{'NOTATHING' is not a valid Unicode character name}} \
// expected-error {{macro name must be an identifier}}
#define \NN // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} expected-error {{macro name must be an identifier}}
#define \N{GREEK_SMALL-LETTERALPHA} // expected-error {{'GREEK_SMALL-LETTERALPHA' is not a valid Unicode character name}} \
// expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespaces}}
+#define \N{🤡} // expected-error {{'🤡' is not a valid Unicode character name}} \
+ // expected-error {{macro name must be an identifier}}
#define CONCAT(A, B) A##B
-int CONCAT(\N{GREEK, CAPITALLETTERALPHA}); // expected-error{{expected}} \
- // expected-warning {{incomplete delimited universal character name}}
+int CONCAT(\N{GREEK
+, CAPITALLETTERALPHA});
+// expected-error@-2 {{expected}} \
+// expected-warning@-2 {{incomplete delimited universal character name}}
+
+int \N{\
+LATIN CAPITAL LETTER A WITH GRAVE};
+//ext-warning@-2 {{extension}} cxx2b-warning@-2 {{before C++2b}}
#ifdef TRIGRAPHS
-int \N??<GREEK CAPITAL LETTER ALPHA??> = 0; // expected-warning{{extension}} cxx2b-warning {{before C++2b}} \
+int \N??<GREEK CAPITAL LETTER ALPHA??> = 0; // cxx2b-warning {{before C++2b}} \
+ //ext-warning {{extension}}\
// expected-warning 2{{trigraph converted}}
+int a\N{LATIN CAPITAL LETTER A WITH GRAVE??>; // expected-warning {{trigraph converted}}
+#endif
+
+#ifndef TRIGRAPHS
+int a\N{LATIN CAPITAL LETTER A WITH GRAVE??>;
+// expected-warning@-1 {{trigraph ignored}}\
+// expected-warning@-1 {{incomplete}}\
+// expected-error@-1 {{expected ';' after top level declarator}}
#endif