src/mscorlib/shared/System/Text/Decoder.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 using System.Runtime.Serialization;
   6 using System.Text;
   7 using System;
   8 using System.Diagnostics;
   9 using System.Diagnostics.Contracts;
  10
  11 namespace System.Text
  12 {
  13     // A Decoder is used to decode a sequence of blocks of bytes into a
  14     // sequence of blocks of characters. Following instantiation of a decoder,
  15     // sequential blocks of bytes are converted into blocks of characters through
  16     // calls to the GetChars method. The decoder maintains state between the
  17     // conversions, allowing it to correctly decode byte sequences that span
  18     // adjacent blocks.
  19     //
  20     // Instances of specific implementations of the Decoder abstract base
  21     // class are typically obtained through calls to the GetDecoder method
  22     // of Encoding objects.
  23     //
  24     public abstract class Decoder
  25     {
  26         internal DecoderFallback m_fallback = null;
  27
  28         [NonSerialized]
  29         internal DecoderFallbackBuffer m_fallbackBuffer = null;
  30
  31         internal void SerializeDecoder(SerializationInfo info)
  32         {
  33             info.AddValue("m_fallback", this.m_fallback);
  34         }
  35
  36         protected Decoder()
  37         {
  38             // We don't call default reset because default reset probably isn't good if we aren't initialized.
  39         }
  40
  41         public DecoderFallback Fallback
  42         {
  43             get
  44             {
  45                 return m_fallback;
  46             }
  47
  48             set
  49             {
  50                 if (value == null)
  51                     throw new ArgumentNullException(nameof(value));
  52                 Contract.EndContractBlock();
  53
  54                 // Can't change fallback if buffer is wrong
  55                 if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
  56                     throw new ArgumentException(
  57                       SR.Argument_FallbackBufferNotEmpty, nameof(value));
  58
  59                 m_fallback = value;
  60                 m_fallbackBuffer = null;
  61             }
  62         }
  63
  64         // Note: we don't test for threading here because async access to Encoders and Decoders
  65         // doesn't work anyway.
  66         public DecoderFallbackBuffer FallbackBuffer
  67         {
  68             get
  69             {
  70                 if (m_fallbackBuffer == null)
  71                 {
  72                     if (m_fallback != null)
  73                         m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
  74                     else
  75                         m_fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
  76                 }
  77
  78                 return m_fallbackBuffer;
  79             }
  80         }
  81
  82         internal bool InternalHasFallbackBuffer
  83         {
  84             get
  85             {
  86                 return m_fallbackBuffer != null;
  87             }
  88         }
  89
  90         // Reset the Decoder
  91         //
  92         // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder.  This
  93         // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
  94         //
  95         // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
  96         //
  97         // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
  98         // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
  99         public virtual void Reset()
 100         {
 101             byte[] byteTemp = Array.Empty<byte>();
 102             char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
 103             GetChars(byteTemp, 0, 0, charTemp, 0, true);
 104             if (m_fallbackBuffer != null)
 105                 m_fallbackBuffer.Reset();
 106         }
 107
 108         // Returns the number of characters the next call to GetChars will
 109         // produce if presented with the given range of bytes. The returned value
 110         // takes into account the state in which the decoder was left following the
 111         // last call to GetChars. The state of the decoder is not affected
 112         // by a call to this method.
 113         //
 114         public abstract int GetCharCount(byte[] bytes, int index, int count);
 115
 116         public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
 117         {
 118             return GetCharCount(bytes, index, count);
 119         }
 120
 121         // We expect this to be the workhorse for NLS Encodings, but for existing
 122         // ones we need a working (if slow) default implementation)
 123         [CLSCompliant(false)]
 124         public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
 125         {
 126             // Validate input parameters
 127             if (bytes == null)
 128                 throw new ArgumentNullException(nameof(bytes),
 129                       SR.ArgumentNull_Array);
 130
 131             if (count < 0)
 132                 throw new ArgumentOutOfRangeException(nameof(count),
 133                       SR.ArgumentOutOfRange_NeedNonNegNum);
 134             Contract.EndContractBlock();
 135
 136             byte[] arrbyte = new byte[count];
 137             int index;
 138
 139             for (index = 0; index < count; index++)
 140                 arrbyte[index] = bytes[index];
 141
 142             return GetCharCount(arrbyte, 0, count);
 143         }
 144
 145         // Decodes a range of bytes in a byte array into a range of characters
 146         // in a character array. The method decodes byteCount bytes from
 147         // bytes starting at index byteIndex, storing the resulting
 148         // characters in chars starting at index charIndex. The
 149         // decoding takes into account the state in which the decoder was left
 150         // following the last call to this method.
 151         //
 152         // An exception occurs if the character array is not large enough to
 153         // hold the complete decoding of the bytes. The GetCharCount method
 154         // can be used to determine the exact number of characters that will be
 155         // produced for a given range of bytes. Alternatively, the
 156         // GetMaxCharCount method of the Encoding that produced this
 157         // decoder can be used to determine the maximum number of characters that
 158         // will be produced for a given number of bytes, regardless of the actual
 159         // byte values.
 160         //
 161         public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
 162                                         char[] chars, int charIndex);
 163
 164         public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
 165                                        char[] chars, int charIndex, bool flush)
 166         {
 167             return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
 168         }
 169
 170         // We expect this to be the workhorse for NLS Encodings, but for existing
 171         // ones we need a working (if slow) default implementation)
 172         //
 173         // WARNING WARNING WARNING
 174         //
 175         // WARNING: If this breaks it could be a security threat.  Obviously we
 176         // call this internally, so you need to make sure that your pointers, counts
 177         // and indexes are correct when you call this method.
 178         //
 179         // In addition, we have internal code, which will be marked as "safe" calling
 180         // this code.  However this code is dependent upon the implementation of an
 181         // external GetChars() method, which could be overridden by a third party and
 182         // the results of which cannot be guaranteed.  We use that result to copy
 183         // the char[] to our char* output buffer.  If the result count was wrong, we
 184         // could easily overflow our output buffer.  Therefore we do an extra test
 185         // when we copy the buffer so that we don't overflow charCount either.
 186         [CLSCompliant(false)]
 187         public virtual unsafe int GetChars(byte* bytes, int byteCount,
 188                                               char* chars, int charCount, bool flush)
 189         {
 190             // Validate input parameters
 191             if (chars == null || bytes == null)
 192                 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
 193                     SR.ArgumentNull_Array);
 194
 195             if (byteCount < 0 || charCount < 0)
 196                 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
 197                     SR.ArgumentOutOfRange_NeedNonNegNum);
 198             Contract.EndContractBlock();
 199
 200             // Get the byte array to convert
 201             byte[] arrByte = new byte[byteCount];
 202
 203             int index;
 204             for (index = 0; index < byteCount; index++)
 205                 arrByte[index] = bytes[index];
 206
 207             // Get the char array to fill
 208             char[] arrChar = new char[charCount];
 209
 210             // Do the work
 211             int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);
 212
 213             Debug.Assert(result <= charCount, "Returned more chars than we have space for");
 214
 215             // Copy the char array
 216             // WARNING: We MUST make sure that we don't copy too many chars.  We can't
 217             // rely on result because it could be a 3rd party implementation.  We need
 218             // to make sure we never copy more than charCount chars no matter the value
 219             // of result
 220             if (result < charCount)
 221                 charCount = result;
 222
 223             // We check both result and charCount so that we don't accidentally overrun
 224             // our pointer buffer just because of an issue in GetChars
 225             for (index = 0; index < charCount; index++)
 226                 chars[index] = arrChar[index];
 227
 228             return charCount;
 229         }
 230
 231         // This method is used when the output buffer might not be large enough.
 232         // It will decode until it runs out of bytes, and then it will return
 233         // true if it the entire input was converted.  In either case it
 234         // will also return the number of converted bytes and output characters used.
 235         // It will only throw a buffer overflow exception if the entire lenght of chars[] is
 236         // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
 237         // We're done processing this buffer only if completed returns true.
 238         //
 239         // Might consider checking Max...Count to avoid the extra counting step.
 240         //
 241         // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
 242         // that its likely that we didn't consume as many bytes as we could have.  For some
 243         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 244         public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
 245                                       char[] chars, int charIndex, int charCount, bool flush,
 246                                       out int bytesUsed, out int charsUsed, out bool completed)
 247         {
 248             // Validate parameters
 249             if (bytes == null || chars == null)
 250                 throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)),
 251                       SR.ArgumentNull_Array);
 252
 253             if (byteIndex < 0 || byteCount < 0)
 254                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
 255                       SR.ArgumentOutOfRange_NeedNonNegNum);
 256
 257             if (charIndex < 0 || charCount < 0)
 258                 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
 259                       SR.ArgumentOutOfRange_NeedNonNegNum);
 260
 261             if (bytes.Length - byteIndex < byteCount)
 262                 throw new ArgumentOutOfRangeException(nameof(bytes),
 263                       SR.ArgumentOutOfRange_IndexCountBuffer);
 264
 265             if (chars.Length - charIndex < charCount)
 266                 throw new ArgumentOutOfRangeException(nameof(chars),
 267                       SR.ArgumentOutOfRange_IndexCountBuffer);
 268             Contract.EndContractBlock();
 269
 270             bytesUsed = byteCount;
 271
 272             // Its easy to do if it won't overrun our buffer.
 273             while (bytesUsed > 0)
 274             {
 275                 if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
 276                 {
 277                     charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
 278                     completed = (bytesUsed == byteCount &&
 279                         (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
 280                     return;
 281                 }
 282
 283                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 284                 flush = false;
 285                 bytesUsed /= 2;
 286             }
 287
 288             // Oops, we didn't have anything, we'll have to throw an overflow
 289             throw new ArgumentException(SR.Argument_ConversionOverflow);
 290         }
 291
 292         // This is the version that uses *.
 293         // We're done processing this buffer only if completed returns true.
 294         //
 295         // Might consider checking Max...Count to avoid the extra counting step.
 296         //
 297         // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
 298         // that its likely that we didn't consume as many bytes as we could have.  For some
 299         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 300         [CLSCompliant(false)]
 301         public virtual unsafe void Convert(byte* bytes, int byteCount,
 302                                              char* chars, int charCount, bool flush,
 303                                              out int bytesUsed, out int charsUsed, out bool completed)
 304         {
 305             // Validate input parameters
 306             if (chars == null || bytes == null)
 307                 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
 308                     SR.ArgumentNull_Array);
 309
 310             if (byteCount < 0 || charCount < 0)
 311                 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
 312                     SR.ArgumentOutOfRange_NeedNonNegNum);
 313             Contract.EndContractBlock();
 314
 315             // Get ready to do it
 316             bytesUsed = byteCount;
 317
 318             // Its easy to do if it won't overrun our buffer.
 319             while (bytesUsed > 0)
 320             {
 321                 if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
 322                 {
 323                     charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
 324                     completed = (bytesUsed == byteCount &&
 325                         (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
 326                     return;
 327                 }
 328
 329                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 330                 flush = false;
 331                 bytesUsed /= 2;
 332             }
 333
 334             // Oops, we didn't have anything, we'll have to throw an overflow
 335             throw new ArgumentException(SR.Argument_ConversionOverflow);
 336         }
 337     }
 338 }