From 8110c9df221ed2e7c3b9652092cdba825d7cd91e Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 29 Nov 2016 19:45:17 +0000 Subject: [PATCH] Support constant expression evaluation for wchar_t versions of simple string functions, in order to support constexpr std::char_traits. llvm-svn: 288193 --- clang/include/clang/Basic/Builtins.def | 16 +++++ clang/lib/AST/ASTContext.cpp | 4 ++ clang/lib/AST/ExprConstant.cpp | 102 +++++++++++++++++++++---------- clang/test/SemaCXX/constexpr-string.cpp | 103 ++++++++++++++++++++++++++++++-- 4 files changed, 191 insertions(+), 34 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index de5332c..731d856 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -29,6 +29,7 @@ // f -> float // d -> double // z -> size_t +// w -> wchar_t // F -> constant CFString // G -> id // H -> SEL @@ -456,6 +457,12 @@ BUILTIN(__builtin_strpbrk, "c*cC*cC*", "nF") BUILTIN(__builtin_strrchr, "c*cC*i", "nF") BUILTIN(__builtin_strspn, "zcC*cC*", "nF") BUILTIN(__builtin_strstr, "c*cC*cC*", "nF") +BUILTIN(__builtin_wcschr, "w*wC*w", "nF") +BUILTIN(__builtin_wcscmp, "iwC*wC*", "nF") +BUILTIN(__builtin_wcslen, "zwC*", "nF") +BUILTIN(__builtin_wcsncmp, "iwC*wC*z", "nF") +BUILTIN(__builtin_wmemchr, "w*wC*wz", "nF") +BUILTIN(__builtin_wmemcmp, "iwC*wC*z", "nF") BUILTIN(__builtin_return_address, "v*IUi", "n") BUILTIN(__builtin_extract_return_addr, "v*v*", "n") BUILTIN(__builtin_frame_address, "v*IUi", "n") @@ -830,6 +837,15 @@ LIBBUILTIN(isupper, "ii", "fnU", "ctype.h", ALL_LANGUAGES) LIBBUILTIN(isxdigit, "ii", "fnU", "ctype.h", ALL_LANGUAGES) LIBBUILTIN(tolower, "ii", "fnU", "ctype.h", ALL_LANGUAGES) LIBBUILTIN(toupper, "ii", "fnU", "ctype.h", ALL_LANGUAGES) +// C99 wchar.h +// FIXME: This list is incomplete. We should cover at least the functions that +// take format strings. +LIBBUILTIN(wcschr, "w*wC*w", "f", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(wcscmp, "iwC*wC*", "f", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(wcslen, "zwC*", "f", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(wcsncmp, "iwC*wC*z", "f", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(wmemchr, "w*wC*wz", "f", "wchar.h", ALL_LANGUAGES) +LIBBUILTIN(wmemcmp, "iwC*wC*z", "f", "wchar.h", ALL_LANGUAGES) // C99 // In some systems setjmp is a macro that expands to _setjmp. We undefine diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 26c7938..62d9b65 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -8554,6 +8554,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'z'!"); Type = Context.getSizeType(); break; + case 'w': // wchar_t. + assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'w'!"); + Type = Context.getWideCharType(); + break; case 'F': Type = Context.getCFConstantStringType(); break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7fd8177..e212928 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -5346,16 +5346,20 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } case Builtin::BIstrchr: + case Builtin::BIwcschr: case Builtin::BImemchr: + case Builtin::BIwmemchr: if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 - << (BuiltinOp == Builtin::BIstrchr ? "'strchr'" : "'memchr'"); + << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); // Fall through. case Builtin::BI__builtin_strchr: - case Builtin::BI__builtin_memchr: { + case Builtin::BI__builtin_wcschr: + case Builtin::BI__builtin_memchr: + case Builtin::BI__builtin_wmemchr: { if (!Visit(E->getArg(0))) return false; APSInt Desired; @@ -5363,29 +5367,51 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return false; uint64_t MaxLength = uint64_t(-1); if (BuiltinOp != Builtin::BIstrchr && - BuiltinOp != Builtin::BI__builtin_strchr) { + BuiltinOp != Builtin::BIwcschr && + BuiltinOp != Builtin::BI__builtin_strchr && + BuiltinOp != Builtin::BI__builtin_wcschr) { APSInt N; if (!EvaluateInteger(E->getArg(2), N, Info)) return false; MaxLength = N.getExtValue(); } - QualType CharTy = Info.Ctx.CharTy; - bool IsStrchr = (BuiltinOp != Builtin::BImemchr && - BuiltinOp != Builtin::BI__builtin_memchr); - - // strchr compares directly to the passed integer, and therefore - // always fails if given an int that is not a char. - if (IsStrchr && - !APSInt::isSameValue(HandleIntToIntCast(Info, E, CharTy, - E->getArg(1)->getType(), - Desired), - Desired)) - return ZeroInitialization(E); + QualType CharTy = E->getArg(0)->getType()->getPointeeType(); + + // Figure out what value we're actually looking for (after converting to + // the corresponding unsigned type if necessary). + uint64_t DesiredVal; + bool StopAtNull = false; + switch (BuiltinOp) { + case Builtin::BIstrchr: + case Builtin::BI__builtin_strchr: + // strchr compares directly to the passed integer, and therefore + // always fails if given an int that is not a char. + if (!APSInt::isSameValue(HandleIntToIntCast(Info, E, CharTy, + E->getArg(1)->getType(), + Desired), + Desired)) + return ZeroInitialization(E); + StopAtNull = true; + // Fall through. + case Builtin::BImemchr: + case Builtin::BI__builtin_memchr: + // memchr compares by converting both sides to unsigned char. That's also + // correct for strchr if we get this far (to cope with plain char being + // unsigned in the strchr case). + DesiredVal = Desired.trunc(Info.Ctx.getCharWidth()).getZExtValue(); + break; - // memchr compares by converting both sides to unsigned char. That's also - // correct for strchr if we get this far. - uint64_t DesiredVal = Desired.trunc(Info.Ctx.getCharWidth()).getZExtValue(); + case Builtin::BIwcschr: + case Builtin::BI__builtin_wcschr: + StopAtNull = true; + // Fall through. + case Builtin::BIwmemchr: + case Builtin::BI__builtin_wmemchr: + // wcschr and wmemchr are given a wchar_t to look for. Just use it. + DesiredVal = Desired.getZExtValue(); + break; + } for (; MaxLength; --MaxLength) { APValue Char; @@ -5394,7 +5420,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return false; if (Char.getInt().getZExtValue() == DesiredVal) return true; - if (IsStrchr && !Char.getInt()) + if (StopAtNull && !Char.getInt()) break; if (!HandleLValueArrayAdjustment(Info, E, Result, CharTy, 1)) return false; @@ -7117,20 +7143,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } case Builtin::BIstrlen: + case Builtin::BIwcslen: // A call to strlen is not a constant expression. if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) - << /*isConstexpr*/0 << /*isConstructor*/0 << "'strlen'"; + << /*isConstexpr*/0 << /*isConstructor*/0 + << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); // Fall through. - case Builtin::BI__builtin_strlen: { + case Builtin::BI__builtin_strlen: + case Builtin::BI__builtin_wcslen: { // As an extension, we support __builtin_strlen() as a constant expression, // and support folding strlen() to a constant. LValue String; if (!EvaluatePointer(E->getArg(0), String, Info)) return false; + QualType CharTy = E->getArg(0)->getType()->getPointeeType(); + // Fast path: if it's a string literal, search the string value. if (const StringLiteral *S = dyn_cast_or_null( String.getLValueBase().dyn_cast())) { @@ -7139,7 +7170,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, StringRef Str = S->getBytes(); int64_t Off = String.Offset.getQuantity(); if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() && - S->getCharByteWidth() == 1) { + S->getCharByteWidth() == 1 && + // FIXME: Add fast-path for wchar_t too. + Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) { Str = Str.substr(Off); StringRef::size_type Pos = Str.find(0); @@ -7153,7 +7186,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } // Slow path: scan the bytes of the string looking for the terminating 0. - QualType CharTy = Info.Ctx.CharTy; for (uint64_t Strlen = 0; /**/; ++Strlen) { APValue Char; if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) || @@ -7167,36 +7199,46 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } case Builtin::BIstrcmp: + case Builtin::BIwcscmp: case Builtin::BIstrncmp: + case Builtin::BIwcsncmp: case Builtin::BImemcmp: + case Builtin::BIwmemcmp: // A call to strlen is not a constant expression. if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 - << (BuiltinOp == Builtin::BIstrncmp ? "'strncmp'" : - BuiltinOp == Builtin::BImemcmp ? "'memcmp'" : - "'strcmp'"); + << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); // Fall through. case Builtin::BI__builtin_strcmp: + case Builtin::BI__builtin_wcscmp: case Builtin::BI__builtin_strncmp: - case Builtin::BI__builtin_memcmp: { + case Builtin::BI__builtin_wcsncmp: + case Builtin::BI__builtin_memcmp: + case Builtin::BI__builtin_wmemcmp: { LValue String1, String2; if (!EvaluatePointer(E->getArg(0), String1, Info) || !EvaluatePointer(E->getArg(1), String2, Info)) return false; + + QualType CharTy = E->getArg(0)->getType()->getPointeeType(); + uint64_t MaxLength = uint64_t(-1); if (BuiltinOp != Builtin::BIstrcmp && - BuiltinOp != Builtin::BI__builtin_strcmp) { + BuiltinOp != Builtin::BIwcscmp && + BuiltinOp != Builtin::BI__builtin_strcmp && + BuiltinOp != Builtin::BI__builtin_wcscmp) { APSInt N; if (!EvaluateInteger(E->getArg(2), N, Info)) return false; MaxLength = N.getExtValue(); } bool StopAtNull = (BuiltinOp != Builtin::BImemcmp && - BuiltinOp != Builtin::BI__builtin_memcmp); - QualType CharTy = Info.Ctx.CharTy; + BuiltinOp != Builtin::BIwmemcmp && + BuiltinOp != Builtin::BI__builtin_memcmp && + BuiltinOp != Builtin::BI__builtin_wmemcmp); for (; MaxLength; --MaxLength) { APValue Char1, Char2; if (!handleLValueToRValueConversion(Info, E, CharTy, String1, Char1) || diff --git a/clang/test/SemaCXX/constexpr-string.cpp b/clang/test/SemaCXX/constexpr-string.cpp index 0ce3b05..944038b 100644 --- a/clang/test/SemaCXX/constexpr-string.cpp +++ b/clang/test/SemaCXX/constexpr-string.cpp @@ -1,7 +1,8 @@ // RUN: %clang_cc1 %s -std=c++1z -fsyntax-only -verify -pedantic // RUN: %clang_cc1 %s -std=c++1z -fsyntax-only -verify -pedantic -fno-signed-char +// RUN: %clang_cc1 %s -std=c++1z -fsyntax-only -verify -pedantic -fno-wchar -Dwchar_t=__WCHAR_TYPE__ -# 4 "/usr/include/string.h" 1 3 4 +# 6 "/usr/include/string.h" 1 3 4 extern "C" { typedef decltype(sizeof(int)) size_t; @@ -9,20 +10,38 @@ extern "C" { extern int strcmp(const char *s1, const char *s2); extern int strncmp(const char *s1, const char *s2, size_t n); - extern int memcmp(const char *s1, const char *s2, size_t n); // expected-note {{here}} + extern int memcmp(const void *s1, const void *s2, size_t n); extern char *strchr(const char *s, int c); extern void *memchr(const void *s, int c, size_t n); } - # 19 "SemaCXX/constexpr-string.cpp" 2 + +# 21 "/usr/include/wchar.h" 1 3 4 +extern "C" { + extern size_t wcslen(const wchar_t *p); + + extern int wcscmp(const wchar_t *s1, const wchar_t *s2); + extern int wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t n); + extern int wmemcmp(const wchar_t *s1, const wchar_t *s2, size_t n); + + extern wchar_t *wcschr(const wchar_t *s, wchar_t c); + extern wchar_t *wmemchr(const wchar_t *s, wchar_t c, size_t n); +} + +# 33 "SemaCXX/constexpr-string.cpp" 2 namespace Strlen { constexpr int n = __builtin_strlen("hello"); // ok + static_assert(n == 5); + constexpr int wn = __builtin_wcslen(L"hello"); // ok + static_assert(wn == 5); constexpr int m = strlen("hello"); // expected-error {{constant expression}} expected-note {{non-constexpr function 'strlen' cannot be used in a constant expression}} + constexpr int wm = wcslen(L"hello"); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wcslen' cannot be used in a constant expression}} // Make sure we can evaluate a call to strlen. - int arr[3]; // expected-note {{here}} + int arr[3]; // expected-note 2{{here}} int k = arr[strlen("hello")]; // expected-warning {{array index 5}} + int wk = arr[wcslen(L"hello")]; // expected-warning {{array index 5}} } namespace StrcmpEtc { @@ -71,6 +90,52 @@ namespace StrcmpEtc { constexpr int c = memcmp("hello", "world", 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'memcmp' cannot be used in a constant expression}} } +namespace WcscmpEtc { + constexpr wchar_t kFoobar[6] = {L'f',L'o',L'o',L'b',L'a',L'r'}; + constexpr wchar_t kFoobazfoobar[12] = {L'f',L'o',L'o',L'b',L'a',L'z',L'f',L'o',L'o',L'b',L'a',L'r'}; + + static_assert(__builtin_wcscmp(L"abab", L"abab") == 0); + static_assert(__builtin_wcscmp(L"abab", L"abba") == -1); + static_assert(__builtin_wcscmp(L"abab", L"abaa") == 1); + static_assert(__builtin_wcscmp(L"ababa", L"abab") == 1); + static_assert(__builtin_wcscmp(L"abab", L"ababa") == -1); + static_assert(__builtin_wcscmp(L"abab\0banana", L"abab") == 0); + static_assert(__builtin_wcscmp(L"abab", L"abab\0banana") == 0); + static_assert(__builtin_wcscmp(L"abab\0banana", L"abab\0canada") == 0); + static_assert(__builtin_wcscmp(0, L"abab") == 0); // expected-error {{not an integral constant}} expected-note {{dereferenced null}} + static_assert(__builtin_wcscmp(L"abab", 0) == 0); // expected-error {{not an integral constant}} expected-note {{dereferenced null}} + + static_assert(__builtin_wcscmp(kFoobar, kFoobazfoobar) == -1); // FIXME: Should we reject this? + static_assert(__builtin_wcscmp(kFoobar, kFoobazfoobar + 6) == 0); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + + static_assert(__builtin_wcsncmp(L"abaa", L"abba", 5) == -1); + static_assert(__builtin_wcsncmp(L"abaa", L"abba", 4) == -1); + static_assert(__builtin_wcsncmp(L"abaa", L"abba", 3) == -1); + static_assert(__builtin_wcsncmp(L"abaa", L"abba", 2) == 0); + static_assert(__builtin_wcsncmp(L"abaa", L"abba", 1) == 0); + static_assert(__builtin_wcsncmp(L"abaa", L"abba", 0) == 0); + static_assert(__builtin_wcsncmp(0, 0, 0) == 0); + static_assert(__builtin_wcsncmp(L"abab\0banana", L"abab\0canada", 100) == 0); + + static_assert(__builtin_wcsncmp(kFoobar, kFoobazfoobar, 6) == -1); + static_assert(__builtin_wcsncmp(kFoobar, kFoobazfoobar, 7) == -1); // FIXME: Should we reject this? + static_assert(__builtin_wcsncmp(kFoobar, kFoobazfoobar + 6, 6) == 0); + static_assert(__builtin_wcsncmp(kFoobar, kFoobazfoobar + 6, 7) == 0); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + + static_assert(__builtin_wmemcmp(L"abaa", L"abba", 3) == -1); + static_assert(__builtin_wmemcmp(L"abaa", L"abba", 2) == 0); + static_assert(__builtin_wmemcmp(0, 0, 0) == 0); + static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0banana", 100) == 0); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 100) == -1); // FIXME: Should we reject this? + static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 7) == -1); + static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 6) == -1); + static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 5) == 0); + + constexpr int a = wcscmp(L"hello", L"world"); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wcscmp' cannot be used in a constant expression}} + constexpr int b = wcsncmp(L"hello", L"world", 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wcsncmp' cannot be used in a constant expression}} + constexpr int c = wmemcmp(L"hello", L"world", 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wmemcmp' cannot be used in a constant expression}} +} + namespace StrchrEtc { constexpr const char *kStr = "abca\xff\0d"; constexpr char kFoo[] = {'f', 'o', 'o'}; @@ -104,3 +169,33 @@ namespace StrchrEtc { constexpr bool a = !strchr("hello", 'h'); // expected-error {{constant expression}} expected-note {{non-constexpr function 'strchr' cannot be used in a constant expression}} constexpr bool b = !memchr("hello", 'h', 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'memchr' cannot be used in a constant expression}} } + +namespace WcschrEtc { + constexpr const wchar_t *kStr = L"abca\xffff\0dL"; + constexpr wchar_t kFoo[] = {L'f', L'o', L'o'}; + static_assert(__builtin_wcschr(kStr, L'a') == kStr); + static_assert(__builtin_wcschr(kStr, L'b') == kStr + 1); + static_assert(__builtin_wcschr(kStr, L'c') == kStr + 2); + static_assert(__builtin_wcschr(kStr, L'd') == nullptr); + static_assert(__builtin_wcschr(kStr, L'e') == nullptr); + static_assert(__builtin_wcschr(kStr, L'\0') == kStr + 5); + static_assert(__builtin_wcschr(kStr, L'a' + 256) == nullptr); + static_assert(__builtin_wcschr(kStr, L'a' - 256) == nullptr); + static_assert(__builtin_wcschr(kStr, L'\xffff') == kStr + 4); + static_assert(__builtin_wcschr(kFoo, L'o') == kFoo + 1); + static_assert(__builtin_wcschr(kFoo, L'x') == nullptr); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + static_assert(__builtin_wcschr(nullptr, L'x') == nullptr); // expected-error {{not an integral constant}} expected-note {{dereferenced null}} + + static_assert(__builtin_wmemchr(kStr, L'a', 0) == nullptr); + static_assert(__builtin_wmemchr(kStr, L'a', 1) == kStr); + static_assert(__builtin_wmemchr(kStr, L'\0', 5) == nullptr); + static_assert(__builtin_wmemchr(kStr, L'\0', 6) == kStr + 5); + static_assert(__builtin_wmemchr(kStr, L'\xffff', 8) == kStr + 4); + static_assert(__builtin_wmemchr(kFoo, L'x', 3) == nullptr); + static_assert(__builtin_wmemchr(kFoo, L'x', 4) == nullptr); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + static_assert(__builtin_wmemchr(nullptr, L'x', 3) == nullptr); // expected-error {{not an integral constant}} expected-note {{dereferenced null}} + static_assert(__builtin_wmemchr(nullptr, L'x', 0) == nullptr); // FIXME: Should we reject this? + + constexpr bool a = !wcschr(L"hello", L'h'); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wcschr' cannot be used in a constant expression}} + constexpr bool b = !wmemchr(L"hello", L'h', 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wmemchr' cannot be used in a constant expression}} +} -- 2.7.4