From 9ab99eecc3c614a6fc74f8ee8fab05ef43a0d3e5 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Sat, 20 Feb 2016 20:39:51 +0000 Subject: [PATCH] Lex: Add some unit tests for corrupt header maps Split the implementation of `HeaderMap` into `HeaderMapImpl` so that we can write unit tests that don't depend on the `FileManager`, and then write a few tests that cover the types of corrupt header maps already detected. This also moves type and constant definitions from HeaderMap.cpp to HeaderMapTypes.h so that the test can access them. llvm-svn: 261446 --- clang/include/clang/Lex/HeaderMap.h | 53 +++++++++++------- clang/include/clang/Lex/HeaderMapTypes.h | 43 +++++++++++++++ clang/lib/Lex/HeaderMap.cpp | 76 ++++++++++---------------- clang/unittests/Lex/CMakeLists.txt | 1 + clang/unittests/Lex/HeaderMapTest.cpp | 94 ++++++++++++++++++++++++++++++++ 5 files changed, 199 insertions(+), 68 deletions(-) create mode 100644 clang/include/clang/Lex/HeaderMapTypes.h create mode 100644 clang/unittests/Lex/HeaderMapTest.cpp diff --git a/clang/include/clang/Lex/HeaderMap.h b/clang/include/clang/Lex/HeaderMap.h index b9f215e..d94c4c1 100644 --- a/clang/include/clang/Lex/HeaderMap.h +++ b/clang/include/clang/Lex/HeaderMap.h @@ -26,39 +26,27 @@ class FileManager; struct HMapBucket; struct HMapHeader; -/// This class represents an Apple concept known as a 'header map'. To the -/// \#include file resolution process, it basically acts like a directory of -/// symlinks to files. Its advantages are that it is dense and more efficient -/// to create and process than a directory of symlinks. -class HeaderMap { +/// Implementation for \a HeaderMap that doesn't depend on \a FileManager. +class HeaderMapImpl { std::unique_ptr FileBuffer; bool NeedsBSwap; - HeaderMap(std::unique_ptr File, bool BSwap) - : FileBuffer(std::move(File)), NeedsBSwap(BSwap) {} - public: - /// HeaderMap::Create - This attempts to load the specified file as a header - /// map. If it doesn't look like a HeaderMap, it gives up and returns null. - static const HeaderMap *Create(const FileEntry *FE, FileManager &FM); + HeaderMapImpl(std::unique_ptr File, bool NeedsBSwap) + : FileBuffer(std::move(File)), NeedsBSwap(NeedsBSwap) {} - /// LookupFile - Check to see if the specified relative filename is located in - /// this HeaderMap. If so, open it and return its FileEntry. - /// If RawPath is not NULL and the file is found, RawPath will be set to the - /// raw path at which the file was found in the file system. For example, - /// for a search path ".." and a filename "../file.h" this would be - /// "../../file.h". - const FileEntry *LookupFile(StringRef Filename, FileManager &FM) const; + // Check for a valid header and extract the byte swap. + static bool checkHeader(const llvm::MemoryBuffer &File, bool &NeedsByteSwap); /// If the specified relative filename is located in this HeaderMap return /// the filename it is mapped to, otherwise return an empty StringRef. StringRef lookupFilename(StringRef Filename, SmallVectorImpl &DestPath) const; - /// getFileName - Return the filename of the headermap. + /// Return the filename of the headermap. const char *getFileName() const; - /// dump - Print the contents of this headermap to stderr. + /// Print the contents of this headermap to stderr. void dump() const; private: @@ -68,6 +56,31 @@ private: const char *getString(unsigned StrTabIdx) const; }; +/// This class represents an Apple concept known as a 'header map'. To the +/// \#include file resolution process, it basically acts like a directory of +/// symlinks to files. Its advantages are that it is dense and more efficient +/// to create and process than a directory of symlinks. +class HeaderMap : private HeaderMapImpl { + HeaderMap(std::unique_ptr File, bool BSwap) + : HeaderMapImpl(std::move(File), BSwap) {} + +public: + /// This attempts to load the specified file as a header map. If it doesn't + /// look like a HeaderMap, it gives up and returns null. + static const HeaderMap *Create(const FileEntry *FE, FileManager &FM); + + /// Check to see if the specified relative filename is located in this + /// HeaderMap. If so, open it and return its FileEntry. If RawPath is not + /// NULL and the file is found, RawPath will be set to the raw path at which + /// the file was found in the file system. For example, for a search path + /// ".." and a filename "../file.h" this would be "../../file.h". + const FileEntry *LookupFile(StringRef Filename, FileManager &FM) const; + + using HeaderMapImpl::lookupFilename; + using HeaderMapImpl::getFileName; + using HeaderMapImpl::dump; +}; + } // end namespace clang. #endif diff --git a/clang/include/clang/Lex/HeaderMapTypes.h b/clang/include/clang/Lex/HeaderMapTypes.h new file mode 100644 index 0000000..fbaf4ba --- /dev/null +++ b/clang/include/clang/Lex/HeaderMapTypes.h @@ -0,0 +1,43 @@ +//===- HeaderMapTypes.h - Types for the header map format -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_HEADERMAPTYPES_H +#define LLVM_CLANG_LEX_HEADERMAPTYPES_H + +#include + +namespace clang { + +enum { + HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p', + HMAP_HeaderVersion = 1, + HMAP_EmptyBucketKey = 0 +}; + +struct HMapBucket { + uint32_t Key; // Offset (into strings) of key. + uint32_t Prefix; // Offset (into strings) of value prefix. + uint32_t Suffix; // Offset (into strings) of value suffix. +}; + +struct HMapHeader { + uint32_t Magic; // Magic word, also indicates byte order. + uint16_t Version; // Version number -- currently 1. + uint16_t Reserved; // Reserved for future use - zero for now. + uint32_t StringsOffset; // Offset to start of string pool. + uint32_t NumEntries; // Number of entries in the string table. + uint32_t NumBuckets; // Number of buckets (always a power of 2). + uint32_t MaxValueLength; // Length of longest result path (excluding nul). + // An array of 'NumBuckets' HMapBucket objects follows this header. + // Strings follow the buckets, at StringsOffset. +}; + +} // end namespace clang. + +#endif diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp index 0735d38..26a179c 100644 --- a/clang/lib/Lex/HeaderMap.cpp +++ b/clang/lib/Lex/HeaderMap.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/HeaderMap.h" +#include "clang/Lex/HeaderMapTypes.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/FileManager.h" #include "llvm/ADT/SmallString.h" @@ -22,38 +23,6 @@ #include using namespace clang; -//===----------------------------------------------------------------------===// -// Data Structures and Manifest Constants -//===----------------------------------------------------------------------===// - -enum { - HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p', - HMAP_HeaderVersion = 1, - - HMAP_EmptyBucketKey = 0 -}; - -namespace clang { -struct HMapBucket { - uint32_t Key; // Offset (into strings) of key. - - uint32_t Prefix; // Offset (into strings) of value prefix. - uint32_t Suffix; // Offset (into strings) of value suffix. -}; - -struct HMapHeader { - uint32_t Magic; // Magic word, also indicates byte order. - uint16_t Version; // Version number -- currently 1. - uint16_t Reserved; // Reserved for future use - zero for now. - uint32_t StringsOffset; // Offset to start of string pool. - uint32_t NumEntries; // Number of entries in the string table. - uint32_t NumBuckets; // Number of buckets (always a power of 2). - uint32_t MaxValueLength; // Length of longest result path (excluding nul). - // An array of 'NumBuckets' HMapBucket objects follows this header. - // Strings follow the buckets, at StringsOffset. -}; -} // end namespace clang. - /// HashHMapKey - This is the 'well known' hash function required by the file /// format, used to look up keys in the hash table. The hash table uses simple /// linear probing based on this function. @@ -82,15 +51,25 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) { if (FileSize <= sizeof(HMapHeader)) return nullptr; auto FileBuffer = FM.getBufferForFile(FE); - if (!FileBuffer) return nullptr; // Unreadable file? - const char *FileStart = (*FileBuffer)->getBufferStart(); + if (!FileBuffer || !*FileBuffer) + return nullptr; + bool NeedsByteSwap; + if (!checkHeader(**FileBuffer, NeedsByteSwap)) + return nullptr; + return new HeaderMap(std::move(*FileBuffer), NeedsByteSwap); +} + +bool HeaderMapImpl::checkHeader(const llvm::MemoryBuffer &File, + bool &NeedsByteSwap) { + if (File.getBufferSize() <= sizeof(HMapHeader)) + return false; + const char *FileStart = File.getBufferStart(); // We know the file is at least as big as the header, check it now. const HMapHeader *Header = reinterpret_cast(FileStart); // Sniff it to see if it's a headermap by checking the magic number and // version. - bool NeedsByteSwap; if (Header->Magic == HMAP_HeaderMagicNumber && Header->Version == HMAP_HeaderVersion) NeedsByteSwap = false; @@ -98,12 +77,13 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) { Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion)) NeedsByteSwap = true; // Mixed endianness headermap. else - return nullptr; // Not a header map. + return false; // Not a header map. - if (Header->Reserved != 0) return nullptr; + if (Header->Reserved != 0) + return false; - // Okay, everything looks good, create the header map. - return new HeaderMap(std::move(*FileBuffer), NeedsByteSwap); + // Okay, everything looks good. + return true; } //===----------------------------------------------------------------------===// @@ -112,18 +92,18 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) { /// getFileName - Return the filename of the headermap. -const char *HeaderMap::getFileName() const { +const char *HeaderMapImpl::getFileName() const { return FileBuffer->getBufferIdentifier(); } -unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const { +unsigned HeaderMapImpl::getEndianAdjustedWord(unsigned X) const { if (!NeedsBSwap) return X; return llvm::ByteSwap_32(X); } /// getHeader - Return a reference to the file header, in unbyte-swapped form. /// This method cannot fail. -const HMapHeader &HeaderMap::getHeader() const { +const HMapHeader &HeaderMapImpl::getHeader() const { // We know the file is at least as big as the header. Return it. return *reinterpret_cast(FileBuffer->getBufferStart()); } @@ -131,7 +111,7 @@ const HMapHeader &HeaderMap::getHeader() const { /// getBucket - Return the specified hash table bucket from the header map, /// bswap'ing its fields as appropriate. If the bucket number is not valid, /// this return a bucket with an empty key (0). -HMapBucket HeaderMap::getBucket(unsigned BucketNo) const { +HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const { HMapBucket Result; Result.Key = HMAP_EmptyBucketKey; @@ -155,7 +135,7 @@ HMapBucket HeaderMap::getBucket(unsigned BucketNo) const { /// getString - Look up the specified string in the string table. If the string /// index is not valid, it returns an empty string. -const char *HeaderMap::getString(unsigned StrTabIdx) const { +const char *HeaderMapImpl::getString(unsigned StrTabIdx) const { // Add the start of the string table to the idx. StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset); @@ -174,7 +154,7 @@ const char *HeaderMap::getString(unsigned StrTabIdx) const { //===----------------------------------------------------------------------===// /// dump - Print the contents of this headermap to stderr. -LLVM_DUMP_METHOD void HeaderMap::dump() const { +LLVM_DUMP_METHOD void HeaderMapImpl::dump() const { const HMapHeader &Hdr = getHeader(); unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); @@ -199,15 +179,15 @@ const FileEntry *HeaderMap::LookupFile( StringRef Filename, FileManager &FM) const { SmallString<1024> Path; - StringRef Dest = lookupFilename(Filename, Path); + StringRef Dest = HeaderMapImpl::lookupFilename(Filename, Path); if (Dest.empty()) return nullptr; return FM.getFile(Dest); } -StringRef HeaderMap::lookupFilename(StringRef Filename, - SmallVectorImpl &DestPath) const { +StringRef HeaderMapImpl::lookupFilename(StringRef Filename, + SmallVectorImpl &DestPath) const { const HMapHeader &Hdr = getHeader(); unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); diff --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt index 461e0d9..ef0f06c 100644 --- a/clang/unittests/Lex/CMakeLists.txt +++ b/clang/unittests/Lex/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_unittest(LexTests + HeaderMapTest.cpp LexerTest.cpp PPCallbacksTest.cpp PPConditionalDirectiveRecordTest.cpp diff --git a/clang/unittests/Lex/HeaderMapTest.cpp b/clang/unittests/Lex/HeaderMapTest.cpp new file mode 100644 index 0000000..726e89c2 --- /dev/null +++ b/clang/unittests/Lex/HeaderMapTest.cpp @@ -0,0 +1,94 @@ +//===- unittests/Lex/HeaderMapTest.cpp - HeaderMap tests ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------===// + +#include "clang/Lex/HeaderMap.h" +#include "clang/Lex/HeaderMapTypes.h" +#include "llvm/Support/SwapByteOrder.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace llvm; + +namespace { + +// Lay out a header file for testing. +template struct MapFile { + HMapHeader Header; + HMapBucket Buckets[NumBuckets]; + unsigned char Bytes[NumBytes]; + + void init() { + memset(this, 0, sizeof(MapFile)); + Header.Magic = HMAP_HeaderMagicNumber; + Header.Version = HMAP_HeaderVersion; + Header.NumBuckets = NumBuckets; + Header.StringsOffset = sizeof(Header) + sizeof(Buckets); + } + + void swapBytes() { + using llvm::sys::getSwappedBytes; + Header.Magic = getSwappedBytes(Header.Magic); + Header.Version = getSwappedBytes(Header.Version); + Header.NumBuckets = getSwappedBytes(Header.NumBuckets); + Header.StringsOffset = getSwappedBytes(Header.StringsOffset); + } + + std::unique_ptr getBuffer() const { + return MemoryBuffer::getMemBuffer( + StringRef(reinterpret_cast(this), sizeof(MapFile)), + "header", + /* RequresNullTerminator */ false); + } +}; + +TEST(HeaderMapTest, checkHeaderEmpty) { + bool NeedsSwap; + ASSERT_FALSE(HeaderMapImpl::checkHeader( + *MemoryBuffer::getMemBufferCopy("", "empty"), NeedsSwap)); + ASSERT_FALSE(HeaderMapImpl::checkHeader( + *MemoryBuffer::getMemBufferCopy("", "empty"), NeedsSwap)); +} + +TEST(HeaderMapTest, checkHeaderMagic) { + MapFile<1, 1> File; + File.init(); + File.Header.Magic = 0; + bool NeedsSwap; + ASSERT_FALSE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap)); +} + +TEST(HeaderMapTest, checkHeaderReserved) { + MapFile<1, 1> File; + File.init(); + File.Header.Reserved = 1; + bool NeedsSwap; + ASSERT_FALSE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap)); +} + +TEST(HeaderMapTest, checkHeaderVersion) { + MapFile<1, 1> File; + File.init(); + ++File.Header.Version; + bool NeedsSwap; + ASSERT_FALSE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap)); +} + +TEST(HeaderMapTest, checkHeaderValidButEmpty) { + MapFile<1, 1> File; + File.init(); + bool NeedsSwap; + ASSERT_TRUE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap)); + ASSERT_FALSE(NeedsSwap); + + File.swapBytes(); + ASSERT_TRUE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap)); + ASSERT_TRUE(NeedsSwap); +} + +} // end namespace -- 2.7.4