Fix ZipArchiveEntry names shown as corrupted on other zip programs (#65886)
authorDavid CantĂș <dacantu@microsoft.com>
Thu, 3 Mar 2022 01:59:58 +0000 (17:59 -0800)
committerGitHub <noreply@github.com>
Thu, 3 Mar 2022 01:59:58 +0000 (17:59 -0800)
* Fix ZipArchiveEntry names shown as corrupted on other zip programs

* Add test

src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs
src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs
src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs
src/libraries/System.IO.Compression/tests/ZipArchive/zip_CreateTests.cs

index b5937a9..9308fe1 100644 (file)
@@ -388,6 +388,7 @@ namespace System.IO.Compression.Tests
         protected const string Utf8LowerCaseOUmlautChar = "\u00F6";
         protected const string Utf8CopyrightChar = "\u00A9";
         protected const string AsciiFileName = "file.txt";
+        protected const string UnicodeFileName = "\u4f60\u597D.txt";
         // The o with umlaut is a character that exists in both latin1 and utf8
         protected const string Utf8AndLatin1FileName = $"{Utf8LowerCaseOUmlautChar}.txt";
         // emojis only make sense in utf8
index 7bf3271..0e1fb42 100644 (file)
@@ -42,7 +42,6 @@ namespace System.IO.Compression
         private List<ZipGenericExtraField>? _lhUnknownExtraFields;
         private byte[] _fileComment;
         private readonly CompressionLevel? _compressionLevel;
-        private bool _hasUnicodeEntryNameOrComment;
 
         // Initializes a ZipArchiveEntry instance for an existing archive entry.
         internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd)
@@ -84,8 +83,6 @@ namespace System.IO.Compression
             _fileComment = cd.FileComment;
 
             _compressionLevel = null;
-
-            _hasUnicodeEntryNameOrComment = (_generalPurposeBitFlag & BitFlagValues.UnicodeFileNameAndComment) != 0;
         }
 
         // Initializes a ZipArchiveEntry instance for a new archive entry with a specified compression level.
@@ -144,8 +141,6 @@ namespace System.IO.Compression
             {
                 _archive.AcquireArchiveStream(this);
             }
-
-            _hasUnicodeEntryNameOrComment = false;
         }
 
         /// <summary>
@@ -197,7 +192,11 @@ namespace System.IO.Compression
             set
             {
                 _fileComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, _archive.EntryNameAndCommentEncoding, ushort.MaxValue, out bool isUTF8);
-                _hasUnicodeEntryNameOrComment |= isUTF8;
+
+                if (isUTF8)
+                {
+                    _generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment;
+                }
             }
         }
 
@@ -218,11 +217,19 @@ namespace System.IO.Compression
                 ArgumentNullException.ThrowIfNull(value, nameof(FullName));
 
                 _storedEntryNameBytes = ZipHelper.GetEncodedTruncatedBytesFromString(
-                    value, _archive.EntryNameAndCommentEncoding, 0 /* No truncation */, out bool hasUnicodeEntryName);
+                    value, _archive.EntryNameAndCommentEncoding, 0 /* No truncation */, out bool isUTF8);
 
-                _hasUnicodeEntryNameOrComment |= hasUnicodeEntryName;
                 _storedEntryName = value;
 
+                if (isUTF8)
+                {
+                    _generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment;
+                }
+                else
+                {
+                    _generalPurposeBitFlag &= ~BitFlagValues.UnicodeFileNameAndComment;
+                }
+
                 DetectEntryNameVersion();
             }
         }
@@ -505,11 +512,6 @@ namespace System.IO.Compression
                 extraFieldLength = (ushort)bigExtraFieldLength;
             }
 
-            if (_hasUnicodeEntryNameOrComment)
-                _generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment;
-            else
-                _generalPurposeBitFlag &= ~BitFlagValues.UnicodeFileNameAndComment;
-
             writer.Write(ZipCentralDirectoryFileHeader.SignatureConstant);      // Central directory file header signature  (4 bytes)
             writer.Write((byte)_versionMadeBySpecification);                    // Version made by Specification (version)  (1 byte)
             writer.Write((byte)CurrentZipPlatform);                             // Version made by Compatibility (type)     (1 byte)
index 64fffd2..f85b177 100644 (file)
@@ -554,7 +554,7 @@ namespace System.IO.Compression
         public uint OffsetOfStartOfCentralDirectoryWithRespectToTheStartingDiskNumber;
         public byte[] ArchiveComment;
 
-        public static void WriteBlock(Stream stream, long numberOfEntries, long startOfCentralDirectory, long sizeOfCentralDirectory, byte[]? archiveComment)
+        public static void WriteBlock(Stream stream, long numberOfEntries, long startOfCentralDirectory, long sizeOfCentralDirectory, byte[] archiveComment)
         {
             BinaryWriter writer = new BinaryWriter(stream);
 
@@ -574,10 +574,10 @@ namespace System.IO.Compression
             writer.Write(startOfCentralDirectoryTruncated);
 
             // Should be valid because of how we read archiveComment in TryReadBlock:
-            Debug.Assert((archiveComment == null) || (archiveComment.Length <= ZipFileCommentMaxLength));
+            Debug.Assert(archiveComment.Length <= ZipFileCommentMaxLength);
 
-            writer.Write(archiveComment != null ? (ushort)archiveComment.Length : (ushort)0); // zip file comment length
-            if (archiveComment != null)
+            writer.Write((ushort)archiveComment.Length); // zip file comment length
+            if (archiveComment.Length > 0)
                 writer.Write(archiveComment);
         }
 
index a964f5d..bf84eb4 100644 (file)
@@ -176,6 +176,21 @@ namespace System.IO.Compression.Tests
             AssertDataDescriptor(memoryStream, false);
         }
 
+        [Theory]
+        [InlineData(UnicodeFileName, UnicodeFileName, true)]
+        [InlineData(UnicodeFileName, AsciiFileName, true)]
+        [InlineData(AsciiFileName, UnicodeFileName, true)]
+        [InlineData(AsciiFileName, AsciiFileName, false)]
+        public static void CreateNormal_VerifyUnicodeFileNameAndComment(string fileName, string entryComment, bool isUnicodeFlagExpected)
+        {
+            using var ms = new MemoryStream();
+            using var archive = new ZipArchive(ms, ZipArchiveMode.Create);
+
+            CreateEntry(archive, fileName, fileContents: "xxx", entryComment);
+
+            AssertUnicodeFileNameAndComment(ms, isUnicodeFlagExpected);
+        }
+
         [Fact]
         public static void CreateNormal_With2SameEntries_ThrowException()
         {
@@ -198,11 +213,12 @@ namespace System.IO.Compression.Tests
             return Text.Encoding.UTF8.GetString(input.ToArray());
         }
 
-        private static void CreateEntry(ZipArchive archive, string fileName, string fileContents)
+        private static void CreateEntry(ZipArchive archive, string fileName, string fileContents, string entryComment = null)
         {
             ZipArchiveEntry entry = archive.CreateEntry(fileName);
             using StreamWriter writer = new StreamWriter(entry.Open());
             writer.Write(fileContents);
+            entry.Comment = entryComment;
         }
 
         private static void AssertDataDescriptor(MemoryStream memoryStream, bool hasDataDescriptor)
@@ -211,5 +227,12 @@ namespace System.IO.Compression.Tests
             Assert.Equal(hasDataDescriptor ? 8 : 0, fileBytes[6]);
             Assert.Equal(0, fileBytes[7]);
         }
+
+        private static void AssertUnicodeFileNameAndComment(MemoryStream memoryStream, bool isUnicodeFlagExpected)
+        {
+            byte[] fileBytes = memoryStream.ToArray();
+            Assert.Equal(0, fileBytes[6]);
+            Assert.Equal(isUnicodeFlagExpected ? 8 : 0, fileBytes[7]);
+        }
     }
 }