Move EncodingTable and CodePageDataItem to System.Text namespace (#17061)
[platform/upstream/coreclr.git] / src / mscorlib / src / System / Text / EncodingTable.Unix.cs
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 using System.Collections.Generic;
6 using System.Diagnostics;
7 using System.Globalization;
8 using System.Text;
9
10 namespace System.Text
11 {
12     internal static class EncodingTable
13     {
14         // Return a list of all EncodingInfo objects describing all of our encodings
15         internal static EncodingInfo[] GetEncodings()
16         {
17             EncodingInfo[] arrayEncodingInfo = new EncodingInfo[s_encodingDataTableItems.Length];
18
19             for (int i = 0; i < s_encodingDataTableItems.Length; i++)
20             {
21                 CodePageDataItem dataItem = s_encodingDataTableItems[i];
22
23                 arrayEncodingInfo[i] = new EncodingInfo(dataItem.CodePage, dataItem.WebName,
24                     SR.GetResourceString(dataItem.DisplayNameResourceKey));
25             }
26
27             return arrayEncodingInfo;
28         }
29
30         internal static int GetCodePageFromName(string name)
31         {
32             if (name == null)
33             {
34                 throw new ArgumentNullException(nameof(name));
35             }
36
37             ushort codePage;
38             if (s_encodingDataTable.TryGetValue(name, out codePage))
39             {
40                 return codePage;
41             }
42
43             // The encoding name is not valid.
44             throw new ArgumentException(
45                 string.Format(
46                     CultureInfo.CurrentCulture,
47                     SR.Argument_EncodingNotSupported, name), nameof(name));
48         }
49
50         internal static CodePageDataItem GetCodePageDataItem(int codepage)
51         {
52             CodePageDataItem item;
53
54             switch (codepage)
55             {
56                 case 1200:
57                     item = s_encodingDataTableItems[0];
58                     break;
59                 case 1201:
60                     item = s_encodingDataTableItems[1];
61                     break;
62                 case 12000:
63                     item = s_encodingDataTableItems[2];
64                     break;
65                 case 12001:
66                     item = s_encodingDataTableItems[3];
67                     break;
68                 case 20127:
69                     item = s_encodingDataTableItems[4];
70                     break;
71                 case 28591:
72                     item = s_encodingDataTableItems[5];
73                     break;
74                 case 65000:
75                     item = s_encodingDataTableItems[6];
76                     break;
77                 case 65001:
78                     item = s_encodingDataTableItems[7];
79                     break;
80                 default:
81                     item = null;
82                     break;
83             }
84
85             Debug.Assert(item == null || item.CodePage == codepage, "item.CodePage needs to equal the specified codepage");
86             return item;
87         }
88
89         // PAL ends here.
90
91 #if DEBUG
92         static EncodingTable()
93         {
94             Debug.Assert(
95                 s_encodingDataTable.Count == EncodingTableCapacity,
96                 string.Format(CultureInfo.InvariantCulture,
97                     "EncodingTable s_encodingDataTable's initial capacity (EncodingTableCapacity) is incorrect.{0}Expected (s_encodingDataTable.Count): {1}, Actual (EncodingTableCapacity): {2}",
98                     Environment.NewLine,
99                     s_encodingDataTable.Count,
100                     EncodingTableCapacity));
101         }
102 #endif
103
104         // NOTE: the following two lists were taken from ~\src\classlibnative\nls\encodingdata.cpp
105         // and should be kept in sync with those lists
106
107         private const int EncodingTableCapacity = 42;
108         private readonly static Dictionary<string, ushort> s_encodingDataTable =
109             new Dictionary<string, ushort>(EncodingTableCapacity, StringComparer.OrdinalIgnoreCase)
110         {
111             { "ANSI_X3.4-1968", 20127 },
112             { "ANSI_X3.4-1986", 20127 },
113             { "ascii", 20127 },
114             { "cp367", 20127 },
115             { "cp819", 28591 },
116             { "csASCII", 20127 },
117             { "csISOLatin1", 28591 },
118             { "csUnicode11UTF7", 65000 },
119             { "IBM367", 20127 },
120             { "ibm819", 28591 },
121             { "ISO-10646-UCS-2", 1200 },
122             { "iso-8859-1", 28591 },
123             { "iso-ir-100", 28591 },
124             { "iso-ir-6", 20127 },
125             { "ISO646-US", 20127 },
126             { "iso8859-1", 28591 },
127             { "ISO_646.irv:1991", 20127 },
128             { "iso_8859-1", 28591 },
129             { "iso_8859-1:1987", 28591 },
130             { "l1", 28591 },
131             { "latin1", 28591 },
132             { "ucs-2", 1200 },
133             { "unicode", 1200},
134             { "unicode-1-1-utf-7", 65000 },
135             { "unicode-1-1-utf-8", 65001 },
136             { "unicode-2-0-utf-7", 65000 },
137             { "unicode-2-0-utf-8", 65001 },
138             // People get confused about the FFFE here.  We can't change this because it'd break existing apps.
139             // This has been this way for a long time, including in Mlang.
140             // Big Endian, BOM seems backwards, think of the BOM in little endian order.
141             { "unicodeFFFE", 1201},
142             { "us", 20127 },
143             { "us-ascii", 20127 },
144             { "utf-16", 1200 },
145             { "UTF-16BE", 1201},
146             { "UTF-16LE", 1200},
147             { "utf-32", 12000 },
148             { "UTF-32BE", 12001 },
149             { "UTF-32LE", 12000 },
150             { "utf-7", 65000 },
151             { "utf-8", 65001 },
152             { "x-unicode-1-1-utf-7", 65000 },
153             { "x-unicode-1-1-utf-8", 65001 },
154             { "x-unicode-2-0-utf-7", 65000 },
155             { "x-unicode-2-0-utf-8", 65001 },
156         };
157
158         // redeclaring these constants here for readability below
159         private const uint MIMECONTF_MAILNEWS = Encoding.MIMECONTF_MAILNEWS;
160         private const uint MIMECONTF_BROWSER = Encoding.MIMECONTF_BROWSER;
161         private const uint MIMECONTF_SAVABLE_MAILNEWS = Encoding.MIMECONTF_SAVABLE_MAILNEWS;
162         private const uint MIMECONTF_SAVABLE_BROWSER = Encoding.MIMECONTF_SAVABLE_BROWSER;
163
164         // keep this array sorted by code page, so the order is consistent for GetEncodings()
165         // Remember to update GetCodePageDataItem() if this list is updated
166         private readonly static CodePageDataItem[] s_encodingDataTableItems = new[]
167         {
168             new CodePageDataItem(1200, 1200, "utf-16", MIMECONTF_SAVABLE_BROWSER), // "Unicode"
169             new CodePageDataItem(1201, 1200, "utf-16BE", 0), // Big Endian, old FFFE BOM seems backwards, think of the BOM in little endian order.
170             new CodePageDataItem(12000, 1200, "utf-32", 0), // "Unicode (UTF-32)"
171             new CodePageDataItem(12001, 1200, "utf-32BE", 0), // "Unicode (UTF-32 Big Endian)"
172             new CodePageDataItem(20127, 1252, "us-ascii", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "US-ASCII"
173             new CodePageDataItem(28591, 1252, "iso-8859-1", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Western European (ISO)"
174             new CodePageDataItem(65000, 1200, "utf-7", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Unicode (UTF-7)"
175             new CodePageDataItem(65001, 1200, "utf-8", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Unicode (UTF-8)"
176         };
177     }
178 }