1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
7 using System.Diagnostics;
8 using System.Runtime.InteropServices;
12 // A Decoder is used to decode a sequence of blocks of bytes into a
13 // sequence of blocks of characters. Following instantiation of a decoder,
14 // sequential blocks of bytes are converted into blocks of characters through
15 // calls to the GetChars method. The decoder maintains state between the
16 // conversions, allowing it to correctly decode byte sequences that span
19 // Instances of specific implementations of the Decoder abstract base
20 // class are typically obtained through calls to the GetDecoder method
21 // of Encoding objects.
23 public abstract class Decoder
25 internal DecoderFallback _fallback = null;
27 internal DecoderFallbackBuffer _fallbackBuffer = null;
31 // We don't call default reset because default reset probably isn't good if we aren't initialized.
34 public DecoderFallback Fallback
44 throw new ArgumentNullException(nameof(value));
46 // Can't change fallback if buffer is wrong
47 if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
48 throw new ArgumentException(
49 SR.Argument_FallbackBufferNotEmpty, nameof(value));
52 _fallbackBuffer = null;
56 // Note: we don't test for threading here because async access to Encoders and Decoders
57 // doesn't work anyway.
58 public DecoderFallbackBuffer FallbackBuffer
62 if (_fallbackBuffer == null)
64 if (_fallback != null)
65 _fallbackBuffer = _fallback.CreateFallbackBuffer();
67 _fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
70 return _fallbackBuffer;
74 internal bool InternalHasFallbackBuffer
78 return _fallbackBuffer != null;
84 // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder. This
85 // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
87 // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
89 // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
90 // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
91 public virtual void Reset()
93 byte[] byteTemp = Array.Empty<byte>();
94 char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
95 GetChars(byteTemp, 0, 0, charTemp, 0, true);
96 _fallbackBuffer?.Reset();
99 // Returns the number of characters the next call to GetChars will
100 // produce if presented with the given range of bytes. The returned value
101 // takes into account the state in which the decoder was left following the
102 // last call to GetChars. The state of the decoder is not affected
103 // by a call to this method.
105 public abstract int GetCharCount(byte[] bytes, int index, int count);
107 public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
109 return GetCharCount(bytes, index, count);
112 // We expect this to be the workhorse for NLS Encodings, but for existing
113 // ones we need a working (if slow) default implementation)
114 [CLSCompliant(false)]
115 public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
117 // Validate input parameters
119 throw new ArgumentNullException(nameof(bytes),
120 SR.ArgumentNull_Array);
123 throw new ArgumentOutOfRangeException(nameof(count),
124 SR.ArgumentOutOfRange_NeedNonNegNum);
126 byte[] arrbyte = new byte[count];
129 for (index = 0; index < count; index++)
130 arrbyte[index] = bytes[index];
132 return GetCharCount(arrbyte, 0, count);
135 public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes, bool flush)
137 fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
139 return GetCharCount(bytesPtr, bytes.Length, flush);
143 // Decodes a range of bytes in a byte array into a range of characters
144 // in a character array. The method decodes byteCount bytes from
145 // bytes starting at index byteIndex, storing the resulting
146 // characters in chars starting at index charIndex. The
147 // decoding takes into account the state in which the decoder was left
148 // following the last call to this method.
150 // An exception occurs if the character array is not large enough to
151 // hold the complete decoding of the bytes. The GetCharCount method
152 // can be used to determine the exact number of characters that will be
153 // produced for a given range of bytes. Alternatively, the
154 // GetMaxCharCount method of the Encoding that produced this
155 // decoder can be used to determine the maximum number of characters that
156 // will be produced for a given number of bytes, regardless of the actual
159 public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
160 char[] chars, int charIndex);
162 public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
163 char[] chars, int charIndex, bool flush)
165 return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
168 // We expect this to be the workhorse for NLS Encodings, but for existing
169 // ones we need a working (if slow) default implementation)
171 // WARNING WARNING WARNING
173 // WARNING: If this breaks it could be a security threat. Obviously we
174 // call this internally, so you need to make sure that your pointers, counts
175 // and indexes are correct when you call this method.
177 // In addition, we have internal code, which will be marked as "safe" calling
178 // this code. However this code is dependent upon the implementation of an
179 // external GetChars() method, which could be overridden by a third party and
180 // the results of which cannot be guaranteed. We use that result to copy
181 // the char[] to our char* output buffer. If the result count was wrong, we
182 // could easily overflow our output buffer. Therefore we do an extra test
183 // when we copy the buffer so that we don't overflow charCount either.
184 [CLSCompliant(false)]
185 public virtual unsafe int GetChars(byte* bytes, int byteCount,
186 char* chars, int charCount, bool flush)
188 // Validate input parameters
189 if (chars == null || bytes == null)
190 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
191 SR.ArgumentNull_Array);
193 if (byteCount < 0 || charCount < 0)
194 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
195 SR.ArgumentOutOfRange_NeedNonNegNum);
197 // Get the byte array to convert
198 byte[] arrByte = new byte[byteCount];
201 for (index = 0; index < byteCount; index++)
202 arrByte[index] = bytes[index];
204 // Get the char array to fill
205 char[] arrChar = new char[charCount];
208 int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);
210 Debug.Assert(result <= charCount, "Returned more chars than we have space for");
212 // Copy the char array
213 // WARNING: We MUST make sure that we don't copy too many chars. We can't
214 // rely on result because it could be a 3rd party implementation. We need
215 // to make sure we never copy more than charCount chars no matter the value
217 if (result < charCount)
220 // We check both result and charCount so that we don't accidentally overrun
221 // our pointer buffer just because of an issue in GetChars
222 for (index = 0; index < charCount; index++)
223 chars[index] = arrChar[index];
228 public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush)
230 fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
231 fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
233 return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, flush);
237 // This method is used when the output buffer might not be large enough.
238 // It will decode until it runs out of bytes, and then it will return
239 // true if it the entire input was converted. In either case it
240 // will also return the number of converted bytes and output characters used.
241 // It will only throw a buffer overflow exception if the entire lenght of chars[] is
242 // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
243 // We're done processing this buffer only if completed returns true.
245 // Might consider checking Max...Count to avoid the extra counting step.
247 // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
248 // that its likely that we didn't consume as many bytes as we could have. For some
249 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
250 public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
251 char[] chars, int charIndex, int charCount, bool flush,
252 out int bytesUsed, out int charsUsed, out bool completed)
254 // Validate parameters
255 if (bytes == null || chars == null)
256 throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)),
257 SR.ArgumentNull_Array);
259 if (byteIndex < 0 || byteCount < 0)
260 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
261 SR.ArgumentOutOfRange_NeedNonNegNum);
263 if (charIndex < 0 || charCount < 0)
264 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
265 SR.ArgumentOutOfRange_NeedNonNegNum);
267 if (bytes.Length - byteIndex < byteCount)
268 throw new ArgumentOutOfRangeException(nameof(bytes),
269 SR.ArgumentOutOfRange_IndexCountBuffer);
271 if (chars.Length - charIndex < charCount)
272 throw new ArgumentOutOfRangeException(nameof(chars),
273 SR.ArgumentOutOfRange_IndexCountBuffer);
275 bytesUsed = byteCount;
277 // Its easy to do if it won't overrun our buffer.
278 while (bytesUsed > 0)
280 if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
282 charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
283 completed = (bytesUsed == byteCount &&
284 (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
288 // Try again with 1/2 the count, won't flush then 'cause won't read it all
293 // Oops, we didn't have anything, we'll have to throw an overflow
294 throw new ArgumentException(SR.Argument_ConversionOverflow);
297 // This is the version that uses *.
298 // We're done processing this buffer only if completed returns true.
300 // Might consider checking Max...Count to avoid the extra counting step.
302 // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
303 // that its likely that we didn't consume as many bytes as we could have. For some
304 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
305 [CLSCompliant(false)]
306 public virtual unsafe void Convert(byte* bytes, int byteCount,
307 char* chars, int charCount, bool flush,
308 out int bytesUsed, out int charsUsed, out bool completed)
310 // Validate input parameters
311 if (chars == null || bytes == null)
312 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
313 SR.ArgumentNull_Array);
315 if (byteCount < 0 || charCount < 0)
316 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
317 SR.ArgumentOutOfRange_NeedNonNegNum);
319 // Get ready to do it
320 bytesUsed = byteCount;
322 // Its easy to do if it won't overrun our buffer.
323 while (bytesUsed > 0)
325 if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
327 charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
328 completed = (bytesUsed == byteCount &&
329 (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
333 // Try again with 1/2 the count, won't flush then 'cause won't read it all
338 // Oops, we didn't have anything, we'll have to throw an overflow
339 throw new ArgumentException(SR.Argument_ConversionOverflow);
342 public virtual unsafe void Convert(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush, out int bytesUsed, out int charsUsed, out bool completed)
344 fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
345 fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
347 Convert(bytesPtr, bytes.Length, charsPtr, chars.Length, flush, out bytesUsed, out charsUsed, out completed);