From 15243d5a6df3e2cd3e3386891a6338388029b256 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Sun, 10 Jun 2018 20:57:14 +0000 Subject: [PATCH] Attempt 3: Resubmit "[Support] Expose flattenWindowsCommandLine." I took some liberties and quoted fewer characters than before, based on an article from MSDN which says that only certain characters cause an arg to require quoting. This seems to be incorrect, though, and worse it seems to be a difference in Windows version. The bot that fails is Windows 7, and I can't reproduce the failure on Win 10. But it's definitely related to quoting and special characters, because both tests that fail have a * in the argument, which is one of the special characters that would cause an argument to be quoted before but not any longer after the new patch. Since I don't have Win 7, all I can do is just guess that I need to restore the old quoting rules. So this patch does that in hopes that it fixes the problem on Windows 7. llvm-svn: 334375 --- llvm/include/llvm/Support/Program.h | 13 ++ llvm/lib/Support/Program.cpp | 9 ++ llvm/lib/Support/Unix/Program.inc | 10 +- llvm/lib/Support/Windows/Path.inc | 2 +- llvm/lib/Support/Windows/Program.inc | 187 +++++++++++---------------- 5 files changed, 99 insertions(+), 122 deletions(-) diff --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h index 381b0d4902dc..e64bfb2948af 100644 --- a/llvm/include/llvm/Support/Program.h +++ b/llvm/include/llvm/Support/Program.h @@ -130,6 +130,11 @@ namespace sys { std::string *ErrMsg = nullptr, bool *ExecutionFailed = nullptr); + /// Return true if the given arguments fit within system-specific + /// argument length limits. + bool commandLineFitsWithinSystemLimits(StringRef Program, + ArrayRef Args); + /// Return true if the given arguments fit within system-specific /// argument length limits. bool commandLineFitsWithinSystemLimits(StringRef Program, @@ -189,6 +194,14 @@ namespace sys { ///< string is non-empty upon return an error occurred while invoking the ///< program. ); + +#if defined(_WIN32) + /// Given a list of command line arguments, quote and escape them as necessary + /// to build a single flat command line appropriate for calling CreateProcess + /// on + /// Windows. + std::string flattenWindowsCommandLine(ArrayRef Args); +#endif } } diff --git a/llvm/lib/Support/Program.cpp b/llvm/lib/Support/Program.cpp index 8484b46e522a..8c56fb6db2d5 100644 --- a/llvm/lib/Support/Program.cpp +++ b/llvm/lib/Support/Program.cpp @@ -63,6 +63,15 @@ ProcessInfo sys::ExecuteNoWait(StringRef Program, const char **Args, return PI; } +bool sys::commandLineFitsWithinSystemLimits(StringRef Program, + ArrayRef Args) { + SmallVector StringRefArgs; + StringRefArgs.reserve(Args.size()); + for (const char *A : Args) + StringRefArgs.emplace_back(A); + return commandLineFitsWithinSystemLimits(Program, StringRefArgs); +} + // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Program.inc" diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc index 04840bf44482..be971555b828 100644 --- a/llvm/lib/Support/Unix/Program.inc +++ b/llvm/lib/Support/Unix/Program.inc @@ -434,7 +434,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, } bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, - ArrayRef Args) { + ArrayRef Args) { static long ArgMax = sysconf(_SC_ARG_MAX); // POSIX requires that _POSIX_ARG_MAX is 4096, which is the lowest possible // value for ARG_MAX on a POSIX compliant system. @@ -456,18 +456,16 @@ bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, long HalfArgMax = EffectiveArgMax / 2; size_t ArgLength = Program.size() + 1; - for (const char* Arg : Args) { - size_t length = strlen(Arg); - + for (StringRef Arg : Args) { // Ensure that we do not exceed the MAX_ARG_STRLEN constant on Linux, which // does not have a constant unlike what the man pages would have you // believe. Since this limit is pretty high, perform the check // unconditionally rather than trying to be aggressive and limiting it to // Linux only. - if (length >= (32 * 4096)) + if (Arg.size() >= (32 * 4096)) return false; - ArgLength += length + 1; + ArgLength += Arg.size() + 1; if (ArgLength > size_t(HalfArgMax)) { return false; } diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 9173206d8166..7ef7468867d6 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -1197,7 +1197,7 @@ Expected openNativeFileForRead(const Twine &Name, OpenFlags Flags, if (Result && RealPath) realPathFromHandle(*Result, *RealPath); - return std::move(Result); + return Result; } void closeFile(file_t &F) { diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc index 183b66ce2c03..19a38c78432b 100644 --- a/llvm/lib/Support/Windows/Program.inc +++ b/llvm/lib/Support/Windows/Program.inc @@ -23,6 +23,7 @@ #include #include #include +#include //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code @@ -31,7 +32,7 @@ namespace llvm { -ProcessInfo::ProcessInfo() : Process(0), Pid(0), ReturnCode(0) {} +ProcessInfo::ProcessInfo() : Pid(0), Process(0), ReturnCode(0) {} ErrorOr sys::findProgramByName(StringRef Name, ArrayRef Paths) { @@ -146,108 +147,13 @@ static HANDLE RedirectIO(Optional Path, int fd, return h; } -/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling -/// CreateProcess. -static bool ArgNeedsQuotes(const char *Str) { - return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0; } -/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur -/// in the C string Start. -static unsigned int CountPrecedingBackslashes(const char *Start, - const char *Cur) { - unsigned int Count = 0; - --Cur; - while (Cur >= Start && *Cur == '\\') { - ++Count; - --Cur; - } - return Count; -} - -/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash -/// preceding Cur in the Start string. Assumes Dst has enough space. -static char *EscapePrecedingEscapes(char *Dst, const char *Start, - const char *Cur) { - unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur); - while (PrecedingEscapes > 0) { - *Dst++ = '\\'; - --PrecedingEscapes; - } - return Dst; -} - -/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling -/// CreateProcess and returns length of quoted arg with escaped quotes -static unsigned int ArgLenWithQuotes(const char *Str) { - const char *Start = Str; - bool Quoted = ArgNeedsQuotes(Str); - unsigned int len = Quoted ? 2 : 0; - - while (*Str != '\0') { - if (*Str == '\"') { - // We need to add a backslash, but ensure that it isn't escaped. - unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str); - len += PrecedingEscapes + 1; - } - // Note that we *don't* need to escape runs of backslashes that don't - // precede a double quote! See MSDN: - // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx - - ++len; - ++Str; - } - - if (Quoted) { - // Make sure the closing quote doesn't get escaped by a trailing backslash. - unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str); - len += PrecedingEscapes + 1; - } - - return len; -} - -} - -static std::unique_ptr flattenArgs(const char **Args) { - // First, determine the length of the command line. - unsigned len = 0; - for (unsigned i = 0; Args[i]; i++) { - len += ArgLenWithQuotes(Args[i]) + 1; - } - - // Now build the command line. - std::unique_ptr command(new char[len+1]); - char *p = command.get(); - - for (unsigned i = 0; Args[i]; i++) { - const char *arg = Args[i]; - const char *start = arg; - - bool needsQuoting = ArgNeedsQuotes(arg); - if (needsQuoting) - *p++ = '"'; - - while (*arg != '\0') { - if (*arg == '\"') { - // Escape all preceding escapes (if any), and then escape the quote. - p = EscapePrecedingEscapes(p, start, arg); - *p++ = '\\'; - } - - *p++ = *arg++; - } - - if (needsQuoting) { - // Make sure our quote doesn't get escaped by a trailing backslash. - p = EscapePrecedingEscapes(p, start, arg); - *p++ = '"'; - } - *p++ = ' '; - } - - *p = 0; - return command; +static SmallVector buildArgVector(const char **Args) { + SmallVector Result; + for (unsigned I = 0; Args[I]; ++I) + Result.push_back(StringRef(Args[I])); + return Result; } static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, @@ -270,7 +176,8 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, // Windows wants a command line, not an array of args, to pass to the new // process. We have to concatenate them all, while quoting the args that // have embedded spaces (or are empty). - std::unique_ptr command = flattenArgs(Args); + auto ArgVector = buildArgVector(Args); + std::string Command = flattenWindowsCommandLine(ArgVector); // The pointer to the environment block for the new process. std::vector EnvBlock; @@ -353,7 +260,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, } SmallVector CommandUtf16; - if (std::error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) { + if (std::error_code ec = windows::UTF8ToUTF16(Command, CommandUtf16)) { SetLastError(ec.value()); MakeErrMsg(ErrMsg, std::string("Unable to convert command-line to UTF-16")); @@ -414,7 +321,63 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, return true; } +static bool argNeedsQuotes(StringRef Arg) { + if (Arg.empty()) + return true; + return StringRef::npos != Arg.find_first_of("\t \"&\'()*<>\\`^|"); +} + +static std::string quoteSingleArg(StringRef Arg) { + std::string Result; + Result.push_back('"'); + + while (!Arg.empty()) { + size_t FirstNonBackslash = Arg.find_first_not_of('\\'); + size_t BackslashCount = FirstNonBackslash; + if (FirstNonBackslash == StringRef::npos) { + // The entire remainder of the argument is backslashes. Escape all of + // them and just early out. + BackslashCount = Arg.size(); + Result.append(BackslashCount * 2, '\\'); + break; + } + + if (Arg[FirstNonBackslash] == '\"') { + // This is an embedded quote. Escape all preceding backslashes, then + // add one additional backslash to escape the quote. + Result.append(BackslashCount * 2 + 1, '\\'); + Result.push_back('\"'); + } else { + // This is just a normal character. Don't escape any of the preceding + // backslashes, just append them as they are and then append the + // character. + Result.append(BackslashCount, '\\'); + Result.push_back(Arg[FirstNonBackslash]); + } + + // Drop all the backslashes, plus the following character. + Arg = Arg.drop_front(FirstNonBackslash + 1); + } + + Result.push_back('"'); + return Result; +} + namespace llvm { +std::string sys::flattenWindowsCommandLine(ArrayRef Args) { + std::string Command; + for (StringRef Arg : Args) { + if (argNeedsQuotes(Arg)) + Command += quoteSingleArg(Arg); + else + Command += Arg; + + Command.push_back(' '); + } + + return Command; +} + ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilChildTerminates, std::string *ErrMsg) { assert(PI.Pid && "invalid pid to wait on, process not started?"); @@ -537,19 +500,13 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, } bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, - ArrayRef Args) { + ArrayRef Args) { // The documented max length of the command line passed to CreateProcess. static const size_t MaxCommandStringLength = 32768; - // Account for the trailing space for the program path and the - // trailing NULL of the last argument. - size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2; - for (const char* Arg : Args) { - // Account for the trailing space for every arg - ArgLength += ArgLenWithQuotes(Arg) + 1; - if (ArgLength > MaxCommandStringLength) { - return false; - } - } - return true; + SmallVector FullArgs; + FullArgs.push_back(Program); + FullArgs.append(Args.begin(), Args.end()); + std::string Result = flattenWindowsCommandLine(FullArgs); + return (Result.size() + 1) <= MaxCommandStringLength; } } -- 2.34.1