From 732ae12c9cdd8227ab2882f0949d798c3fcb446d Mon Sep 17 00:00:00 2001 From: Huo Yaoyuan Date: Sat, 4 Mar 2023 07:00:07 +0800 Subject: [PATCH] Implement managed SegmentCommandLine (#82883) * Implement managed version of SegmentCommandLine * Remove P/Invoke for CommandLineToArgv * Update parsing to latest UCRT * Add test and fix trailing space in command * Fix test cases --- .../Windows/Shell32/Interop.CommandLineToArgv.cs | 13 -- .../src/System.Private.CoreLib.Shared.projitems | 5 +- .../src/System/Environment.Windows.cs | 147 +++++++++++++++++++-- .../tests/System/Environment.GetCommandLineArgs.cs | 25 ++++ 4 files changed, 160 insertions(+), 30 deletions(-) delete mode 100644 src/libraries/Common/src/Interop/Windows/Shell32/Interop.CommandLineToArgv.cs diff --git a/src/libraries/Common/src/Interop/Windows/Shell32/Interop.CommandLineToArgv.cs b/src/libraries/Common/src/Interop/Windows/Shell32/Interop.CommandLineToArgv.cs deleted file mode 100644 index ecc73f5..0000000 --- a/src/libraries/Common/src/Interop/Windows/Shell32/Interop.CommandLineToArgv.cs +++ /dev/null @@ -1,13 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Runtime.InteropServices; - -internal static unsafe partial class Interop -{ - internal static partial class Shell32 - { - [LibraryImport(Libraries.Shell32, EntryPoint = "CommandLineToArgvW")] - internal static partial char** CommandLineToArgv(char* lpCommandLine, int* pNumArgs); - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index b200e12..f28e4c0 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1938,9 +1938,6 @@ Common\Interop\Windows\Secur32\Interop.GetUserNameExW.cs - - Common\Interop\Windows\Shell32\Interop.CommandLineToArgv.cs - Common\Interop\Windows\Shell32\Interop.SHGetKnownFolderPath.cs @@ -2577,4 +2574,4 @@ - + \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/System/Environment.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Environment.Windows.cs index 5934ac4..5344f92 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Environment.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Environment.Windows.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using Microsoft.Win32.SafeHandles; +using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; @@ -216,26 +217,146 @@ namespace System char* lpCmdLine = Interop.Kernel32.GetCommandLine(); Debug.Assert(lpCmdLine != null); - int numArgs = 0; - char** argvW = Interop.Shell32.CommandLineToArgv(lpCmdLine, &numArgs); - if (argvW == null) - { - ThrowHelper.ThrowOutOfMemoryException(); - } + return SegmentCommandLine(lpCmdLine); + } - try + private static unsafe string[] SegmentCommandLine(char* cmdLine) + { + // Parse command line arguments using the rules documented at + // https://learn.microsoft.com/cpp/cpp/main-function-command-line-args#parsing-c-command-line-arguments + + // CommandLineToArgvW API cannot be used here since + // it has slightly different behavior. + + ArrayBuilder arrayBuilder = default; + + Span stringBuffer = stackalloc char[260]; // Use MAX_PATH for a typical maximum + scoped ValueStringBuilder stringBuilder; + + char c; + + // First scan the program name, copy it, and count the bytes + + char* p = cmdLine; + + // A quoted program name is handled here. The handling is much + // simpler than for other arguments. Basically, whatever lies + // between the leading double-quote and next one, or a terminal null + // character is simply accepted. Fancier handling is not required + // because the program name must be a legal NTFS/HPFS file name. + // Note that the double-quote characters are not copied, nor do they + // contribyte to character_count. + + bool inQuotes = false; + stringBuilder = new ValueStringBuilder(stringBuffer); + + do { - string[] result = new string[numArgs]; - for (int i = 0; i < result.Length; i++) + if (*p == '"') { - result[i] = new string(*(argvW + i)); + inQuotes = !inQuotes; + c = *p++; + continue; } - return result; + + c = *p++; + stringBuilder.Append(c); } - finally + while (c != '\0' && (inQuotes || (c is not (' ' or '\t')))); + + if (c == '\0') { - Interop.Kernel32.LocalFree((IntPtr)argvW); + p--; } + + stringBuilder.Length--; + arrayBuilder.Add(stringBuilder.ToString()); + inQuotes = false; + + // loop on each argument + while (true) + { + if (*p != '\0') + { + while (*p is ' ' or '\t') + { + ++p; + } + } + + if (*p == '\0') + { + // end of args + break; + } + + // scan an argument + stringBuilder = new ValueStringBuilder(stringBuffer); + + // loop through scanning one argument + while (true) + { + bool copyChar = true; + + // Rules: + // 2N backslashes + " ==> N backslashes and begin/end quote + // 2N+1 backslashes + " ==> N backslashes + literal " + // N backslashes ==> N backslashes + int numSlash = 0; + + while (*p == '\\') + { + // Count number of backslashes for use below + ++p; + ++numSlash; + } + + if (*p == '"') + { + // if 2N backslashes before, start / end quote, otherwise + // copy literally: + if (numSlash % 2 == 0) + { + if (inQuotes && p[1] == '"') + { + p++; // Double quote inside quoted string + } + else + { + // Skip first quote char and copy second: + copyChar = false; // Don't copy quote + inQuotes = !inQuotes; + } + } + + numSlash /= 2; + } + + // Copy slashes: + while (numSlash-- > 0) + { + stringBuilder.Append('\\'); + } + + // If at end of arg, break loop: + if (*p == '\0' || (!inQuotes && *p is ' ' or '\t')) + { + break; + } + + // Copy character into argument: + if (copyChar) + { + stringBuilder.Append(*p); + } + + ++p; + } + + arrayBuilder.Add(stringBuilder.ToString()); + } + + return arrayBuilder.ToArray(); } } } diff --git a/src/libraries/System.Runtime.Extensions/tests/System/Environment.GetCommandLineArgs.cs b/src/libraries/System.Runtime.Extensions/tests/System/Environment.GetCommandLineArgs.cs index dc07a79..759f0e0 100644 --- a/src/libraries/System.Runtime.Extensions/tests/System/Environment.GetCommandLineArgs.cs +++ b/src/libraries/System.Runtime.Extensions/tests/System/Environment.GetCommandLineArgs.cs @@ -103,5 +103,30 @@ namespace System.Tests return RemoteExecutor.SuccessExitCode; } + + public static bool IsWindowsCoreCLRJit + => PlatformDetection.IsWindows + && PlatformDetection.IsNotMonoRuntime + && PlatformDetection.IsNotNativeAot; + + [ConditionalTheory(typeof(GetCommandLineArgs), nameof(IsWindowsCoreCLRJit))] + [InlineData(@"cmd ""abc"" d e", new[] { "cmd", "abc", "d", "e" })] + [InlineData(@"cmd a\\b d""e f""g h", new[] { "cmd", @"a\\b", "de fg", "h" })] + [InlineData(@"cmd a\\\""b c d", new[] { "cmd", @"a\""b", "c", "d" })] + [InlineData(@"cmd a\\\\""b c"" d e", new[] { "cmd", @"a\\b c", "d", "e" })] + [InlineData(@"cmd a""b"""" c d", new[] { "cmd", @"ab"" c d" })] + [InlineData(@"X:\No""t A"""" P""ath arg", new[] { @"X:\Not A Path", "arg" })] + [InlineData(@"""\\Some Server\cmd"" ""arg", new[] { @"\\Some Server\cmd", "arg" })] + public static unsafe void CheckCommandLineParser(string cmdLine, string[] args) + { + var method = typeof(Environment).GetMethod("SegmentCommandLine", BindingFlags.Static | BindingFlags.NonPublic); + Assert.NotNull(method); + + var span = cmdLine.AsSpan(); // Workaround + fixed (char* p = span) + { + Assert.Equal(args, method.Invoke(null, new object[] { Pointer.Box(p, typeof(char*)) })); + } + } } } -- 2.7.4