src/mscorlib/shared/System/Text/Decoder.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 using System.Text;
   6 using System;
   7 using System.Diagnostics;
   8 using System.Diagnostics.Contracts;
   9
  10 namespace System.Text
  11 {
  12     // A Decoder is used to decode a sequence of blocks of bytes into a
  13     // sequence of blocks of characters. Following instantiation of a decoder,
  14     // sequential blocks of bytes are converted into blocks of characters through
  15     // calls to the GetChars method. The decoder maintains state between the
  16     // conversions, allowing it to correctly decode byte sequences that span
  17     // adjacent blocks.
  18     //
  19     // Instances of specific implementations of the Decoder abstract base
  20     // class are typically obtained through calls to the GetDecoder method
  21     // of Encoding objects.
  22     //
  23     public abstract class Decoder
  24     {
  25         internal DecoderFallback _fallback = null;
  26
  27         internal DecoderFallbackBuffer _fallbackBuffer = null;
  28
  29         protected Decoder()
  30         {
  31             // We don't call default reset because default reset probably isn't good if we aren't initialized.
  32         }
  33
  34         public DecoderFallback Fallback
  35         {
  36             get
  37             {
  38                 return _fallback;
  39             }
  40
  41             set
  42             {
  43                 if (value == null)
  44                     throw new ArgumentNullException(nameof(value));
  45                 Contract.EndContractBlock();
  46
  47                 // Can't change fallback if buffer is wrong
  48                 if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
  49                     throw new ArgumentException(
  50                       SR.Argument_FallbackBufferNotEmpty, nameof(value));
  51
  52                 _fallback = value;
  53                 _fallbackBuffer = null;
  54             }
  55         }
  56
  57         // Note: we don't test for threading here because async access to Encoders and Decoders
  58         // doesn't work anyway.
  59         public DecoderFallbackBuffer FallbackBuffer
  60         {
  61             get
  62             {
  63                 if (_fallbackBuffer == null)
  64                 {
  65                     if (_fallback != null)
  66                         _fallbackBuffer = _fallback.CreateFallbackBuffer();
  67                     else
  68                         _fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
  69                 }
  70
  71                 return _fallbackBuffer;
  72             }
  73         }
  74
  75         internal bool InternalHasFallbackBuffer
  76         {
  77             get
  78             {
  79                 return _fallbackBuffer != null;
  80             }
  81         }
  82
  83         // Reset the Decoder
  84         //
  85         // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder.  This
  86         // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
  87         //
  88         // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
  89         //
  90         // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
  91         // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
  92         public virtual void Reset()
  93         {
  94             byte[] byteTemp = Array.Empty<byte>();
  95             char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
  96             GetChars(byteTemp, 0, 0, charTemp, 0, true);
  97             _fallbackBuffer?.Reset();
  98         }
  99
 100         // Returns the number of characters the next call to GetChars will
 101         // produce if presented with the given range of bytes. The returned value
 102         // takes into account the state in which the decoder was left following the
 103         // last call to GetChars. The state of the decoder is not affected
 104         // by a call to this method.
 105         //
 106         public abstract int GetCharCount(byte[] bytes, int index, int count);
 107
 108         public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
 109         {
 110             return GetCharCount(bytes, index, count);
 111         }
 112
 113         // We expect this to be the workhorse for NLS Encodings, but for existing
 114         // ones we need a working (if slow) default implementation)
 115         [CLSCompliant(false)]
 116         public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
 117         {
 118             // Validate input parameters
 119             if (bytes == null)
 120                 throw new ArgumentNullException(nameof(bytes),
 121                       SR.ArgumentNull_Array);
 122
 123             if (count < 0)
 124                 throw new ArgumentOutOfRangeException(nameof(count),
 125                       SR.ArgumentOutOfRange_NeedNonNegNum);
 126             Contract.EndContractBlock();
 127
 128             byte[] arrbyte = new byte[count];
 129             int index;
 130
 131             for (index = 0; index < count; index++)
 132                 arrbyte[index] = bytes[index];
 133
 134             return GetCharCount(arrbyte, 0, count);
 135         }
 136
 137         // Decodes a range of bytes in a byte array into a range of characters
 138         // in a character array. The method decodes byteCount bytes from
 139         // bytes starting at index byteIndex, storing the resulting
 140         // characters in chars starting at index charIndex. The
 141         // decoding takes into account the state in which the decoder was left
 142         // following the last call to this method.
 143         //
 144         // An exception occurs if the character array is not large enough to
 145         // hold the complete decoding of the bytes. The GetCharCount method
 146         // can be used to determine the exact number of characters that will be
 147         // produced for a given range of bytes. Alternatively, the
 148         // GetMaxCharCount method of the Encoding that produced this
 149         // decoder can be used to determine the maximum number of characters that
 150         // will be produced for a given number of bytes, regardless of the actual
 151         // byte values.
 152         //
 153         public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
 154                                         char[] chars, int charIndex);
 155
 156         public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
 157                                        char[] chars, int charIndex, bool flush)
 158         {
 159             return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
 160         }
 161
 162         // We expect this to be the workhorse for NLS Encodings, but for existing
 163         // ones we need a working (if slow) default implementation)
 164         //
 165         // WARNING WARNING WARNING
 166         //
 167         // WARNING: If this breaks it could be a security threat.  Obviously we
 168         // call this internally, so you need to make sure that your pointers, counts
 169         // and indexes are correct when you call this method.
 170         //
 171         // In addition, we have internal code, which will be marked as "safe" calling
 172         // this code.  However this code is dependent upon the implementation of an
 173         // external GetChars() method, which could be overridden by a third party and
 174         // the results of which cannot be guaranteed.  We use that result to copy
 175         // the char[] to our char* output buffer.  If the result count was wrong, we
 176         // could easily overflow our output buffer.  Therefore we do an extra test
 177         // when we copy the buffer so that we don't overflow charCount either.
 178         [CLSCompliant(false)]
 179         public virtual unsafe int GetChars(byte* bytes, int byteCount,
 180                                               char* chars, int charCount, bool flush)
 181         {
 182             // Validate input parameters
 183             if (chars == null || bytes == null)
 184                 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
 185                     SR.ArgumentNull_Array);
 186
 187             if (byteCount < 0 || charCount < 0)
 188                 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
 189                     SR.ArgumentOutOfRange_NeedNonNegNum);
 190             Contract.EndContractBlock();
 191
 192             // Get the byte array to convert
 193             byte[] arrByte = new byte[byteCount];
 194
 195             int index;
 196             for (index = 0; index < byteCount; index++)
 197                 arrByte[index] = bytes[index];
 198
 199             // Get the char array to fill
 200             char[] arrChar = new char[charCount];
 201
 202             // Do the work
 203             int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);
 204
 205             Debug.Assert(result <= charCount, "Returned more chars than we have space for");
 206
 207             // Copy the char array
 208             // WARNING: We MUST make sure that we don't copy too many chars.  We can't
 209             // rely on result because it could be a 3rd party implementation.  We need
 210             // to make sure we never copy more than charCount chars no matter the value
 211             // of result
 212             if (result < charCount)
 213                 charCount = result;
 214
 215             // We check both result and charCount so that we don't accidentally overrun
 216             // our pointer buffer just because of an issue in GetChars
 217             for (index = 0; index < charCount; index++)
 218                 chars[index] = arrChar[index];
 219
 220             return charCount;
 221         }
 222
 223         // This method is used when the output buffer might not be large enough.
 224         // It will decode until it runs out of bytes, and then it will return
 225         // true if it the entire input was converted.  In either case it
 226         // will also return the number of converted bytes and output characters used.
 227         // It will only throw a buffer overflow exception if the entire lenght of chars[] is
 228         // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
 229         // We're done processing this buffer only if completed returns true.
 230         //
 231         // Might consider checking Max...Count to avoid the extra counting step.
 232         //
 233         // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
 234         // that its likely that we didn't consume as many bytes as we could have.  For some
 235         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 236         public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
 237                                       char[] chars, int charIndex, int charCount, bool flush,
 238                                       out int bytesUsed, out int charsUsed, out bool completed)
 239         {
 240             // Validate parameters
 241             if (bytes == null || chars == null)
 242                 throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)),
 243                       SR.ArgumentNull_Array);
 244
 245             if (byteIndex < 0 || byteCount < 0)
 246                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
 247                       SR.ArgumentOutOfRange_NeedNonNegNum);
 248
 249             if (charIndex < 0 || charCount < 0)
 250                 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
 251                       SR.ArgumentOutOfRange_NeedNonNegNum);
 252
 253             if (bytes.Length - byteIndex < byteCount)
 254                 throw new ArgumentOutOfRangeException(nameof(bytes),
 255                       SR.ArgumentOutOfRange_IndexCountBuffer);
 256
 257             if (chars.Length - charIndex < charCount)
 258                 throw new ArgumentOutOfRangeException(nameof(chars),
 259                       SR.ArgumentOutOfRange_IndexCountBuffer);
 260             Contract.EndContractBlock();
 261
 262             bytesUsed = byteCount;
 263
 264             // Its easy to do if it won't overrun our buffer.
 265             while (bytesUsed > 0)
 266             {
 267                 if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
 268                 {
 269                     charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
 270                     completed = (bytesUsed == byteCount &&
 271                         (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
 272                     return;
 273                 }
 274
 275                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 276                 flush = false;
 277                 bytesUsed /= 2;
 278             }
 279
 280             // Oops, we didn't have anything, we'll have to throw an overflow
 281             throw new ArgumentException(SR.Argument_ConversionOverflow);
 282         }
 283
 284         // This is the version that uses *.
 285         // We're done processing this buffer only if completed returns true.
 286         //
 287         // Might consider checking Max...Count to avoid the extra counting step.
 288         //
 289         // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
 290         // that its likely that we didn't consume as many bytes as we could have.  For some
 291         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 292         [CLSCompliant(false)]
 293         public virtual unsafe void Convert(byte* bytes, int byteCount,
 294                                              char* chars, int charCount, bool flush,
 295                                              out int bytesUsed, out int charsUsed, out bool completed)
 296         {
 297             // Validate input parameters
 298             if (chars == null || bytes == null)
 299                 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
 300                     SR.ArgumentNull_Array);
 301
 302             if (byteCount < 0 || charCount < 0)
 303                 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
 304                     SR.ArgumentOutOfRange_NeedNonNegNum);
 305             Contract.EndContractBlock();
 306
 307             // Get ready to do it
 308             bytesUsed = byteCount;
 309
 310             // Its easy to do if it won't overrun our buffer.
 311             while (bytesUsed > 0)
 312             {
 313                 if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
 314                 {
 315                     charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
 316                     completed = (bytesUsed == byteCount &&
 317                         (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
 318                     return;
 319                 }
 320
 321                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 322                 flush = false;
 323                 bytesUsed /= 2;
 324             }
 325
 326             // Oops, we didn't have anything, we'll have to throw an overflow
 327             throw new ArgumentException(SR.Argument_ConversionOverflow);
 328         }
 329     }
 330 }