From 6f1da6e345ccf89a7a0d0bc964bee7f1eb7ed9f6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 26 Jul 2018 15:31:41 +0000 Subject: [PATCH] [ADT] Replace std::isprint by llvm::isPrint. The standard library functions ::isprint/std::isprint have platform- and locale-dependent behavior which makes LLVM's output less predictable. In particular, regression tests my fail depending on the implementation of these functions. Implement llvm::isPrint in StringExtras.h with a standard behavior and replace all uses of ::isprint/std::isprint by a call it llvm::isPrint. The function is inlined and does not look up language settings so it should perform better than the standard library's version. Such a replacement has already been done for isdigit, isalpha, isxdigit in r314883. gtest does the same in gtest-printers.cc using the following justification: // Returns true if c is a printable ASCII character. We test the // value of c directly instead of calling isprint(), which is buggy on // Windows Mobile. inline bool IsPrintableAscii(wchar_t c) { return 0x20 <= c && c <= 0x7E; } Similar issues have also been encountered by Julia: https://github.com/JuliaLang/julia/issues/7416 I noticed the problem myself when on Windows isprint('\t') started to evaluate to true (see https://stackoverflow.com/questions/51435249) and thus caused several unit tests to fail. The result of isprint doesn't seem to be well-defined even for ASCII characters. Therefore I suggest to replace isprint by a platform-independent version. Differential Revision: https://reviews.llvm.org/D49680 llvm-svn: 338034 --- llvm/include/llvm/ADT/StringExtras.h | 9 +++++++++ llvm/include/llvm/Support/raw_ostream.h | 2 +- llvm/lib/MC/MCAsmStreamer.cpp | 2 +- llvm/lib/ProfileData/InstrProfReader.cpp | 2 +- llvm/lib/Support/StringExtras.cpp | 2 +- llvm/lib/Support/raw_ostream.cpp | 4 ++-- llvm/lib/Support/regengine.inc | 2 +- llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 4 ++-- llvm/tools/llvm-objdump/llvm-objdump.cpp | 4 ++-- llvm/tools/llvm-readobj/ObjDumper.cpp | 4 ++-- llvm/unittests/ADT/StringExtrasTest.cpp | 18 ++++++++++++++++++ 11 files changed, 40 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index b1596c5..71b0e75 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -99,6 +99,15 @@ inline bool isASCII(llvm::StringRef S) { return true; } +/// Checks whether character \p C is printable. +/// +/// Locale-independent version of the C standard library isprint whose results +/// may differ on different platforms. +inline bool isPrint(char C) { + unsigned char UC = static_cast(C); + return (0x20 <= UC) && (UC <= 0x7E); +} + /// Returns the corresponding lowercase character if \p x is uppercase. inline char toLower(char x) { if (x >= 'A' && x <= 'Z') diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index e54c8cc..b9ea9b5 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -220,7 +220,7 @@ public: raw_ostream &write_uuid(const uuid_t UUID); /// Output \p Str, turning '\\', '\t', '\n', '"', and anything that doesn't - /// satisfy std::isprint into an escape sequence. + /// satisfy llvm::isPrint into an escape sequence. raw_ostream &write_escaped(StringRef Str, bool UseHexEscapes = false); raw_ostream &write(unsigned char C); diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 1429f64..88154bd 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -815,7 +815,7 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) { continue; } - if (isprint((unsigned char)C)) { + if (isPrint((unsigned char)C)) { OS << (char)C; continue; } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 64f98ad..3b70415 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -129,7 +129,7 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { StringRef buffer = Buffer.getBufferStart(); return count == 0 || std::all_of(buffer.begin(), buffer.begin() + count, - [](char c) { return ::isprint(c) || ::isspace(c); }); + [](char c) { return isPrint(c) || ::isspace(c); }); } // Read the profile variant flag from the header: ":FE" means this is a FE diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index d566b72..386d74a 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -61,7 +61,7 @@ void llvm::SplitString(StringRef Source, void llvm::printEscapedString(StringRef Name, raw_ostream &Out) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { unsigned char C = Name[i]; - if (isprint(C) && C != '\\' && C != '"') + if (isPrint(C) && C != '\\' && C != '"') Out << C; else Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index b2260c1..1dae469 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -160,7 +160,7 @@ raw_ostream &raw_ostream::write_escaped(StringRef Str, *this << '\\' << '"'; break; default: - if (std::isprint(c)) { + if (isPrint(c)) { *this << c; break; } @@ -436,7 +436,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) { // Print the ASCII char values for each byte on this line for (uint8_t Byte : Line) { - if (isprint(Byte)) + if (isPrint(Byte)) *this << static_cast(Byte); else *this << '.'; diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc index 62d8c26..41787af 100644 --- a/llvm/lib/Support/regengine.inc +++ b/llvm/lib/Support/regengine.inc @@ -1013,7 +1013,7 @@ pchar(int ch) { static char pbuf[10]; - if (isprint(ch) || ch == ' ') + if (isPrint(ch) || ch == ' ') (void)snprintf(pbuf, sizeof pbuf, "%c", ch); else (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); diff --git a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 4551718..1939dc6 100644 --- a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -699,7 +699,7 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo, std::string Str; bool ArrayIsPrintable = true; for (unsigned j = i - 1, je = Record.size(); j != je; ++j) { - if (!isprint(static_cast(Record[j]))) { + if (!isPrint(static_cast(Record[j]))) { ArrayIsPrintable = false; break; } @@ -719,7 +719,7 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo, } else { bool BlobIsPrintable = true; for (unsigned i = 0, e = Blob.size(); i != e; ++i) - if (!isprint(static_cast(Blob[i]))) { + if (!isPrint(static_cast(Blob[i]))) { BlobIsPrintable = false; break; } diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 610cd11..8041e6f 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1651,7 +1651,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } Byte = Bytes.slice(Index)[0]; outs() << format(" %02x", Byte); - AsciiData[NumBytes] = isprint(Byte) ? Byte : '.'; + AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.'; uint8_t IndentOffset = 0; NumBytes++; @@ -1899,7 +1899,7 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) { // Print ascii. outs() << " "; for (std::size_t i = 0; i < 16 && addr + i < end; ++i) { - if (std::isprint(static_cast(Contents[addr + i]) & 0xFF)) + if (isPrint(static_cast(Contents[addr + i]) & 0xFF)) outs() << Contents[addr + i]; else outs() << "."; diff --git a/llvm/tools/llvm-readobj/ObjDumper.cpp b/llvm/tools/llvm-readobj/ObjDumper.cpp index 17675fd..a725140 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.cpp +++ b/llvm/tools/llvm-readobj/ObjDumper.cpp @@ -29,7 +29,7 @@ ObjDumper::~ObjDumper() { static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) { for (size_t i = 0; i < Len; i++) - W << (isprint(Start[i]) ? static_cast(Start[i]) : '.'); + W << (isPrint(Start[i]) ? static_cast(Start[i]) : '.'); } static Expected @@ -138,7 +138,7 @@ void ObjDumper::printSectionAsHex(const object::ObjectFile *Obj, TmpSecPtr = SecPtr; for (i = 0; TmpSecPtr + i < SecEnd && i < 16; ++i) - W.startLine() << (isprint(TmpSecPtr[i]) ? static_cast(TmpSecPtr[i]) + W.startLine() << (isPrint(TmpSecPtr[i]) ? static_cast(TmpSecPtr[i]) : '.'); W.startLine() << '\n'; diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index f98f388..1df2005 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -13,6 +13,17 @@ using namespace llvm; +TEST(StringExtrasTest, isPrint) { + EXPECT_FALSE(isPrint('\0')); + EXPECT_FALSE(isPrint('\t')); + EXPECT_TRUE(isPrint('0')); + EXPECT_TRUE(isPrint('a')); + EXPECT_TRUE(isPrint('A')); + EXPECT_TRUE(isPrint(' ')); + EXPECT_TRUE(isPrint('~')); + EXPECT_TRUE(isPrint('?')); +} + TEST(StringExtrasTest, Join) { std::vector Items; EXPECT_EQ("", join(Items.begin(), Items.end(), " ")); @@ -93,6 +104,13 @@ TEST(StringExtrasTest, printLowerCase) { EXPECT_EQ("abcdefg01234.,&!~`'}\"", OS.str()); } +TEST(StringExtrasTest, printEscapedString) { + std::string str; + raw_string_ostream OS(str); + printEscapedString("ABCdef123&<>\\\"'\t", OS); + EXPECT_EQ("ABCdef123&<>\\5C\\22'\\09", OS.str()); +} + TEST(StringExtrasTest, printHTMLEscaped) { std::string str; raw_string_ostream OS(str); -- 2.7.4