From: Tamás Zolnai Date: Sat, 14 Mar 2020 16:57:02 +0000 (+0100) Subject: [clang-tidy] extend bugprone-signed-char-misuse check. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=04410c565aa08b703ef5d11b454e7fba47163e3c;p=platform%2Fupstream%2Fllvm.git [clang-tidy] extend bugprone-signed-char-misuse check. Summary: Cover a new use case when using a 'signed char' as an integer might lead to issue with non-ASCII characters. Comparing a 'signed char' with an 'unsigned char' using equality / unequality operator produces an unexpected result for non-ASCII characters. Reviewers: aaron.ballman, alexfh, hokein, njames93 Reviewed By: njames93 Subscribers: xazax.hun, cfe-commits Tags: #clang, #clang-tools-extra Differential Revision: https://reviews.llvm.org/D75749 --- diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp index 32949a8..732ccbc 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp @@ -18,6 +18,8 @@ namespace clang { namespace tidy { namespace bugprone { +static constexpr int UnsignedASCIIUpperBound = 127; + static Matcher hasAnyListedName(const std::string &Names) { const std::vector NameList = utils::options::parseStringList(Names); @@ -33,25 +35,29 @@ void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList); } -void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { +// Create a matcher for char -> integer cast. +BindableMatcher SignedCharMisuseCheck::charCastExpression( + bool IsSigned, const Matcher &IntegerType, + const std::string &CastBindName) const { // We can ignore typedefs which are some kind of integer types // (e.g. typedef char sal_Int8). In this case, we don't need to // worry about the misinterpretation of char values. const auto IntTypedef = qualType( hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList)))); - const auto SignedCharType = expr(hasType(qualType( - allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))))); - - const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()), - unless(booleanType()))) - .bind("integerType"); + auto CharTypeExpr = expr(); + if (IsSigned) { + CharTypeExpr = expr(hasType( + qualType(isAnyCharacter(), isSignedInteger(), unless(IntTypedef)))); + } else { + CharTypeExpr = expr(hasType(qualType( + isAnyCharacter(), unless(isSignedInteger()), unless(IntTypedef)))); + } - // We are interested in signed char -> integer conversion. const auto ImplicitCastExpr = - implicitCastExpr(hasSourceExpression(SignedCharType), + implicitCastExpr(hasSourceExpression(CharTypeExpr), hasImplicitDestinationType(IntegerType)) - .bind("castExpression"); + .bind(CastBindName); const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr)); const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr)); @@ -59,44 +65,84 @@ void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { // We catch any type of casts to an integer. We need to have these cast // expressions explicitly to catch only those casts which are direct children - // of an assignment/declaration. - const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr, - StaticCastExpr, FunctionalCastExpr)); + // of the checked expressions. (e.g. assignment, declaration). + return expr(anyOf(ImplicitCastExpr, CStyleCastExpr, StaticCastExpr, + FunctionalCastExpr)); +} - // Catch assignments with the suspicious type conversion. - const auto AssignmentOperatorExpr = expr(binaryOperator( - hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr))); +void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { + const auto IntegerType = + qualType(isInteger(), unless(isAnyCharacter()), unless(booleanType())) + .bind("integerType"); + const auto SignedCharCastExpr = + charCastExpression(true, IntegerType, "signedCastExpression"); + const auto UnSignedCharCastExpr = + charCastExpression(false, IntegerType, "unsignedCastExpression"); + + // Catch assignments with singed char -> integer conversion. + const auto AssignmentOperatorExpr = + expr(binaryOperator(hasOperatorName("="), hasLHS(hasType(IntegerType)), + hasRHS(SignedCharCastExpr))); Finder->addMatcher(AssignmentOperatorExpr, this); - // Catch declarations with the suspicious type conversion. - const auto Declaration = - varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr)); + // Catch declarations with singed char -> integer conversion. + const auto Declaration = varDecl(isDefinition(), hasType(IntegerType), + hasInitializer(SignedCharCastExpr)); Finder->addMatcher(Declaration, this); + + // Catch signed char/unsigned char comparison. + const auto CompareOperator = + expr(binaryOperator(hasAnyOperatorName("==", "!="), + anyOf(allOf(hasLHS(SignedCharCastExpr), + hasRHS(UnSignedCharCastExpr)), + allOf(hasLHS(UnSignedCharCastExpr), + hasRHS(SignedCharCastExpr))))) + .bind("comparison"); + + Finder->addMatcher(CompareOperator, this); } void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) { - const auto *CastExpression = - Result.Nodes.getNodeAs("castExpression"); - const auto *IntegerType = Result.Nodes.getNodeAs("integerType"); - assert(CastExpression); - assert(IntegerType); + const auto *SignedCastExpression = + Result.Nodes.getNodeAs("signedCastExpression"); - // Ignore the match if we know that the value is not negative. + // Ignore the match if we know that the signed char's value is not negative. // The potential misinterpretation happens for negative values only. Expr::EvalResult EVResult; - if (!CastExpression->isValueDependent() && - CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) { - llvm::APSInt Value1 = EVResult.Val.getInt(); - if (Value1.isNonNegative()) + if (!SignedCastExpression->isValueDependent() && + SignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult, + *Result.Context)) { + llvm::APSInt Value = EVResult.Val.getInt(); + if (Value.isNonNegative()) return; } - diag(CastExpression->getBeginLoc(), - "'signed char' to %0 conversion; " - "consider casting to 'unsigned char' first.") - << *IntegerType; + if (const auto *Comparison = Result.Nodes.getNodeAs("comparison")) { + const auto *UnSignedCastExpression = + Result.Nodes.getNodeAs("unsignedCastExpression"); + + // We can ignore the ASCII value range also for unsigned char. + Expr::EvalResult EVResult; + if (!UnSignedCastExpression->isValueDependent() && + UnSignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult, + *Result.Context)) { + llvm::APSInt Value = EVResult.Val.getInt(); + if (Value <= UnsignedASCIIUpperBound) + return; + } + + diag(Comparison->getBeginLoc(), + "comparison between 'signed char' and 'unsigned char'"); + } else if (const auto *IntegerType = + Result.Nodes.getNodeAs("integerType")) { + diag(SignedCastExpression->getBeginLoc(), + "'signed char' to %0 conversion; " + "consider casting to 'unsigned char' first.") + << *IntegerType; + } else + llvm_unreachable("Unexpected match"); } } // namespace bugprone diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h index 4ce9895..3fa0c9c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h @@ -15,13 +15,11 @@ namespace clang { namespace tidy { namespace bugprone { -/// Finds ``signed char`` -> integer conversions which might indicate a programming -/// error. The basic problem with the ``signed char``, that it might store the -/// non-ASCII characters as negative values. The human programmer probably -/// expects that after an integer conversion the converted value matches with the -/// character code (a value from [0..255]), however, the actual value is in -/// [-128..127] interval. This also applies to the plain ``char`` type on -/// those implementations which represent ``char`` similar to ``signed char``. +/// Finds those ``signed char`` -> integer conversions which might indicate a +/// programming error. The basic problem with the ``signed char``, that it might +/// store the non-ASCII characters as negative values. This behavior can cause a +/// misunderstanding of the written code both when an explicit and when an +/// implicit conversion happens. /// /// For the user-facing documentation see: /// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html @@ -34,6 +32,11 @@ public: void check(const ast_matchers::MatchFinder::MatchResult &Result) override; private: + ast_matchers::internal::BindableMatcher charCastExpression( + bool IsSigned, + const ast_matchers::internal::Matcher &IntegerType, + const std::string &CastBindName) const; + const std::string CharTypdefsToIgnoreList; }; diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst index 20187b1..e3ecf75 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst @@ -3,27 +3,39 @@ bugprone-signed-char-misuse =========================== -Finds ``signed char`` -> integer conversions which might indicate a programming -error. The basic problem with the ``signed char``, that it might store the -non-ASCII characters as negative values. The human programmer probably -expects that after an integer conversion the converted value matches with the +Finds those ``signed char`` -> integer conversions which might indicate a +programming error. The basic problem with the ``signed char``, that it might +store the non-ASCII characters as negative values. This behavior can cause a +misunderstanding of the written code both when an explicit and when an +implicit conversion happens. + +When the code contains an explicit ``signed char`` -> integer conversion, the +human programmer probably expects that the converted value matches with the character code (a value from [0..255]), however, the actual value is in -[-128..127] interval. This also applies to the plain ``char`` type on -those implementations which represent ``char`` similar to ``signed char``. - -To avoid this kind of misinterpretation, the desired way of converting from a -``signed char`` to an integer value is converting to ``unsigned char`` first, -which stores all the characters in the positive [0..255] interval which matches -with the known character codes. - -It depends on the actual platform whether ``char`` is handled as ``signed char`` +[-128..127] interval. To avoid this kind of misinterpretation, the desired way +of converting from a ``signed char`` to an integer value is converting to +``unsigned char`` first, which stores all the characters in the positive [0..255] +interval which matches the known character codes. + +In case of implicit conversion, the programmer might not actually be aware +that a conversion happened and char value is used as an integer. There are +some use cases when this unawareness might lead to a functionally imperfect code. +For example, checking the equality of a ``signed char`` and an ``unsigned char`` +variable is something we should avoid in C++ code. During this comparison, +the two variables are converted to integers which have different value ranges. +For ``signed char``, the non-ASCII characters are stored as a value in [-128..-1] +interval, while the same characters are stored in the [128..255] interval for +an ``unsigned char``. + +It depends on the actual platform whether plain ``char`` is handled as ``signed char`` by default and so it is caught by this check or not. To change the default behavior you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options. Currently, this check is limited to assignments and variable declarations, -where a ``signed char`` is assigned to an integer variable. There are other -use cases where the same misinterpretation might lead to similar bogus -behavior. +where a ``signed char`` is assigned to an integer variable and to +equality/inequality comparisons between ``signed char`` and ``unsigned char``. +There are other use cases where the unexpected value ranges might lead to +similar bogus behavior. See also: `STR34-C. Cast characters to unsigned char before converting to larger integer sizes @@ -67,6 +79,29 @@ an ``unsigned char`` value first. return IChar; } +Another use case is checking the equality of two ``char`` variables with +different signedness. Inside the non-ASCII value range this comparison between +a ``signed char`` and an ``unsigned char`` always returns ``false``. + +.. code-block:: c++ + + bool compare(signed char SChar, unsigned char USChar) { + if (SChar == USChar) + return true; + return false; + } + +The easiest way to fix this kind of comparison is casting one of the arguments, +so both arguments will have the same type. + +.. code-block:: c++ + + bool compare(signed char SChar, unsigned char USChar) { + if (static_cast(SChar) == USChar) + return true; + return false; + } + .. option:: CharTypdefsToIgnore A semicolon-separated list of typedef names. In this list, we can list diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp index f3f1a1d..ef42b0c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp @@ -62,6 +62,34 @@ int CharPointer(signed char *CCharacter) { return NCharacter; } +int SignedUnsignedCharEquality(signed char SCharacter) { + unsigned char USCharacter = 'a'; + if (SCharacter == USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse] + return 1; + return 0; +} + +int SignedUnsignedCharIneqiality(signed char SCharacter) { + unsigned char USCharacter = 'a'; + if (SCharacter != USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse] + return 1; + return 0; +} + +int CompareWithNonAsciiConstant(unsigned char USCharacter) { + const signed char SCharacter = -5; + if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse] + return 1; + return 0; +} + +int CompareWithUnsignedNonAsciiConstant(signed char SCharacter) { + const unsigned char USCharacter = 128; + if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse] + return 1; + return 0; +} + /////////////////////////////////////////////////////////////////// /// Test cases correctly ignored by the check. @@ -121,3 +149,61 @@ unsigned char CharToCharCast() { return USCharacter; } + +int FixComparisonWithSignedCharCast(signed char SCharacter) { + unsigned char USCharacter = 'a'; + if (SCharacter == static_cast(USCharacter)) + return 1; + return 0; +} + +int FixComparisonWithUnSignedCharCast(signed char SCharacter) { + unsigned char USCharacter = 'a'; + if (static_cast(SCharacter) == USCharacter) + return 1; + return 0; +} + +// Make sure we don't catch other type of char comparison. +int SameCharTypeComparison(signed char SCharacter) { + signed char SCharacter2 = 'a'; + if (SCharacter == SCharacter2) + return 1; + return 0; +} + +// Make sure we don't catch other type of char comparison. +int SameCharTypeComparison2(unsigned char USCharacter) { + unsigned char USCharacter2 = 'a'; + if (USCharacter == USCharacter2) + return 1; + return 0; +} + +// Make sure we don't catch integer - char comparison. +int CharIntComparison(signed char SCharacter) { + int ICharacter = 10; + if (SCharacter == ICharacter) + return 1; + return 0; +} + +int CompareWithAsciiLiteral(unsigned char USCharacter) { + if (USCharacter == 'x') // no warning + return 1; + return 0; +} + +int CompareWithAsciiConstant(unsigned char USCharacter) { + const signed char SCharacter = 'a'; + if (USCharacter == SCharacter) + return 1; + return 0; +} + +int CompareWithUnsignedAsciiConstant(signed char SCharacter) { + const unsigned char USCharacter = 'a'; + if (USCharacter == SCharacter) + return 1; + return 0; +}