From: Reid Kleckner Date: Tue, 11 Sep 2018 20:22:39 +0000 (+0000) Subject: [Support] Avoid calling CommandLineToArgvW from shell32.dll X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f6968d886a4f2875d15c51d4f86754ebb1084936;p=platform%2Fupstream%2Fllvm.git [Support] Avoid calling CommandLineToArgvW from shell32.dll Summary: Shell32.dll depends on gdi32.dll and user32.dll, which are mostly DLLs for Windows GUI functionality. LLVM's utilities don't typically need GUI functionality, and loading these DLLs seems to be slowing down startup. Also, we already have an implementation of Windows command line tokenization in cl::TokenizeWindowsCommandLine, so we can just use it. The goal is to get the original argv in UTF-8, so that it can pass through most LLVM string APIs. A Windows process starts life with a UTF-16 string for its command line, and it can be retreived with GetCommandLineW from kernel32.dll. Previously, we would: 1. Get the wide command line 2. Call CommandLineToArgvW to handle quoting rules and separate it into arguments. 3. For each wide argument, expand wildcards (* and ?) using FindFirstFileW. 4. Convert each argument to UTF-8 Now we: 1. Get the wide command line, convert the whole thing to UTF-8 2. Tokenize the UTF-8 command line with cl::TokenizeWindowsCommandLine 3. For each argument, expand wildcards if present - This requires converting back to UTF-16 to call FindFirstFileW - Results of FindFirstFileW must be converted back to UTF-8 Reviewers: zturner Subscribers: hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D51941 llvm-svn: 341988 --- diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc index 668d0b7..ce646d6 100644 --- a/llvm/lib/Support/Windows/Process.inc +++ b/llvm/lib/Support/Windows/Process.inc @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/WindowsError.h" #include @@ -140,73 +142,59 @@ Optional Process::GetEnv(StringRef Name) { return std::string(Res.data()); } -static const char *AllocateString(const SmallVectorImpl &S, - BumpPtrAllocator &Alloc) { - char *Buf = reinterpret_cast(Alloc.Allocate(S.size() + 1, 1)); - ::memcpy(Buf, S.data(), S.size()); - Buf[S.size()] = '\0'; - return Buf; -} - -/// Convert Arg from UTF-16 to UTF-8 and push it onto Args. -static std::error_code ConvertAndPushArg(const wchar_t *Arg, - SmallVectorImpl &Args, - BumpPtrAllocator &Alloc) { - SmallVector ArgString; - if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), ArgString)) - return ec; - Args.push_back(AllocateString(ArgString, Alloc)); - return std::error_code(); -} - -/// Perform wildcard expansion of Arg, or just push it into Args if it -/// doesn't have wildcards or doesn't match any files. -static std::error_code WildcardExpand(const wchar_t *Arg, +/// Perform wildcard expansion of Arg, or just push it into Args if it doesn't +/// have wildcards or doesn't match any files. +static std::error_code WildcardExpand(StringRef Arg, SmallVectorImpl &Args, - BumpPtrAllocator &Alloc) { - if (!wcspbrk(Arg, L"*?")) { - // Arg does not contain any wildcard characters. This is the common case. - return ConvertAndPushArg(Arg, Args, Alloc); - } + StringSaver &Saver) { + std::error_code EC; - if (wcscmp(Arg, L"/?") == 0 || wcscmp(Arg, L"-?") == 0) { - // Don't wildcard expand /?. Always treat it as an option. - return ConvertAndPushArg(Arg, Args, Alloc); + // Don't expand Arg if it does not contain any wildcard characters. This is + // the common case. Also don't wildcard expand /?. Always treat it as an + // option. + if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" || + Arg == "-?") { + Args.push_back(Arg.data()); + return EC; } - // Extract any directory part of the argument. - SmallVector Dir; - if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), Dir)) - return ec; - sys::path::remove_filename(Dir); - const int DirSize = Dir.size(); + // Convert back to UTF-16 so we can call FindFirstFileW. + SmallVector ArgW; + EC = windows::UTF8ToUTF16(Arg, ArgW); + if (EC) + return EC; // Search for matching files. // FIXME: This assumes the wildcard is only in the file name and not in the // directory portion of the file path. For example, it doesn't handle // "*\foo.c" nor "s?c\bar.cpp". WIN32_FIND_DATAW FileData; - HANDLE FindHandle = FindFirstFileW(Arg, &FileData); + HANDLE FindHandle = FindFirstFileW(ArgW.data(), &FileData); if (FindHandle == INVALID_HANDLE_VALUE) { - return ConvertAndPushArg(Arg, Args, Alloc); + Args.push_back(Arg.data()); + return EC; } - std::error_code ec; + // Extract any directory part of the argument. + SmallString Dir = Arg; + sys::path::remove_filename(Dir); + const int DirSize = Dir.size(); + do { - SmallVector FileName; - ec = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName), + SmallString FileName; + EC = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName), FileName); - if (ec) + if (EC) break; // Append FileName to Dir, and remove it afterwards. - llvm::sys::path::append(Dir, StringRef(FileName.data(), FileName.size())); - Args.push_back(AllocateString(Dir, Alloc)); + llvm::sys::path::append(Dir, FileName); + Args.push_back(Saver.save(StringRef(Dir)).data()); Dir.resize(DirSize); } while (FindNextFileW(FindHandle, &FileData)); FindClose(FindHandle); - return ec; + return EC; } static std::error_code GetExecutableName(SmallVectorImpl &Filename) { @@ -243,18 +231,20 @@ static std::error_code GetExecutableName(SmallVectorImpl &Filename) { std::error_code windows::GetCommandLineArguments(SmallVectorImpl &Args, BumpPtrAllocator &Alloc) { - int ArgCount; - std::unique_ptr UnicodeCommandLine{ - CommandLineToArgvW(GetCommandLineW(), &ArgCount), &LocalFree}; - if (!UnicodeCommandLine) - return mapWindowsError(::GetLastError()); - + const wchar_t *CmdW = GetCommandLineW(); + assert(CmdW); std::error_code EC; + SmallString Cmd; + EC = windows::UTF16ToUTF8(CmdW, wcslen(CmdW), Cmd); + if (EC) + return EC; - Args.reserve(ArgCount); + SmallVector TmpArgs; + StringSaver Saver(Alloc); + cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false); - for (int I = 0; I < ArgCount; ++I) { - EC = WildcardExpand(UnicodeCommandLine[I], Args, Alloc); + for (const char *Arg : TmpArgs) { + EC = WildcardExpand(Arg, Args, Saver); if (EC) return EC; } @@ -266,7 +256,7 @@ windows::GetCommandLineArguments(SmallVectorImpl &Args, if (EC) return EC; sys::path::append(Arg0, Filename); - Args[0] = AllocateString(Arg0, Alloc); + Args[0] = Saver.save(Arg0).data(); return std::error_code(); }