src/mscorlib/shared/System/Text/Encoder.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 using System.Runtime.Serialization;
   6 using System.Text;
   7 using System;
   8 using System.Diagnostics;
   9 using System.Diagnostics.Contracts;
  10
  11 namespace System.Text
  12 {
  13     // An Encoder is used to encode a sequence of blocks of characters into
  14     // a sequence of blocks of bytes. Following instantiation of an encoder,
  15     // sequential blocks of characters are converted into blocks of bytes through
  16     // calls to the GetBytes method. The encoder maintains state between the
  17     // conversions, allowing it to correctly encode character sequences that span
  18     // adjacent blocks.
  19     //
  20     // Instances of specific implementations of the Encoder abstract base
  21     // class are typically obtained through calls to the GetEncoder method
  22     // of Encoding objects.
  23     //
  24     public abstract class Encoder
  25     {
  26         internal EncoderFallback m_fallback = null;
  27
  28         [NonSerialized]
  29         internal EncoderFallbackBuffer m_fallbackBuffer = null;
  30
  31         internal void SerializeEncoder(SerializationInfo info)
  32         {
  33             info.AddValue("m_fallback", this.m_fallback);
  34         }
  35
  36         protected Encoder()
  37         {
  38             // We don't call default reset because default reset probably isn't good if we aren't initialized.
  39         }
  40
  41         public EncoderFallback Fallback
  42         {
  43             get
  44             {
  45                 return m_fallback;
  46             }
  47
  48             set
  49             {
  50                 if (value == null)
  51                     throw new ArgumentNullException(nameof(value));
  52                 Contract.EndContractBlock();
  53
  54                 // Can't change fallback if buffer is wrong
  55                 if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
  56                     throw new ArgumentException(
  57                       SR.Argument_FallbackBufferNotEmpty, nameof(value));
  58
  59                 m_fallback = value;
  60                 m_fallbackBuffer = null;
  61             }
  62         }
  63
  64         // Note: we don't test for threading here because async access to Encoders and Decoders
  65         // doesn't work anyway.
  66         public EncoderFallbackBuffer FallbackBuffer
  67         {
  68             get
  69             {
  70                 if (m_fallbackBuffer == null)
  71                 {
  72                     if (m_fallback != null)
  73                         m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
  74                     else
  75                         m_fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer();
  76                 }
  77
  78                 return m_fallbackBuffer;
  79             }
  80         }
  81
  82         internal bool InternalHasFallbackBuffer
  83         {
  84             get
  85             {
  86                 return m_fallbackBuffer != null;
  87             }
  88         }
  89
  90         // Reset the Encoder
  91         //
  92         // Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder.  This
  93         // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
  94         //
  95         // If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset().
  96         //
  97         // Virtual implementation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
  98         // We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
  99         public virtual void Reset()
 100         {
 101             char[] charTemp = { };
 102             byte[] byteTemp = new byte[GetByteCount(charTemp, 0, 0, true)];
 103             GetBytes(charTemp, 0, 0, byteTemp, 0, true);
 104             if (m_fallbackBuffer != null)
 105                 m_fallbackBuffer.Reset();
 106         }
 107
 108         // Returns the number of bytes the next call to GetBytes will
 109         // produce if presented with the given range of characters and the given
 110         // value of the flush parameter. The returned value takes into
 111         // account the state in which the encoder was left following the last call
 112         // to GetBytes. The state of the encoder is not affected by a call
 113         // to this method.
 114         //
 115         public abstract int GetByteCount(char[] chars, int index, int count, bool flush);
 116
 117         // We expect this to be the workhorse for NLS encodings
 118         // unfortunately for existing overrides, it has to call the [] version,
 119         // which is really slow, so avoid this method if you might be calling external encodings.
 120         [CLSCompliant(false)]
 121         public virtual unsafe int GetByteCount(char* chars, int count, bool flush)
 122         {
 123             // Validate input parameters
 124             if (chars == null)
 125                 throw new ArgumentNullException(nameof(chars),
 126                       SR.ArgumentNull_Array);
 127
 128             if (count < 0)
 129                 throw new ArgumentOutOfRangeException(nameof(count),
 130                       SR.ArgumentOutOfRange_NeedNonNegNum);
 131             Contract.EndContractBlock();
 132
 133             char[] arrChar = new char[count];
 134             int index;
 135
 136             for (index = 0; index < count; index++)
 137                 arrChar[index] = chars[index];
 138
 139             return GetByteCount(arrChar, 0, count, flush);
 140         }
 141
 142         // Encodes a range of characters in a character array into a range of bytes
 143         // in a byte array. The method encodes charCount characters from
 144         // chars starting at index charIndex, storing the resulting
 145         // bytes in bytes starting at index byteIndex. The encoding
 146         // takes into account the state in which the encoder was left following the
 147         // last call to this method. The flush parameter indicates whether
 148         // the encoder should flush any shift-states and partial characters at the
 149         // end of the conversion. To ensure correct termination of a sequence of
 150         // blocks of encoded bytes, the last call to GetBytes should specify
 151         // a value of true for the flush parameter.
 152         //
 153         // An exception occurs if the byte array is not large enough to hold the
 154         // complete encoding of the characters. The GetByteCount method can
 155         // be used to determine the exact number of bytes that will be produced for
 156         // a given range of characters. Alternatively, the GetMaxByteCount
 157         // method of the Encoding that produced this encoder can be used to
 158         // determine the maximum number of bytes that will be produced for a given
 159         // number of characters, regardless of the actual character values.
 160         //
 161         public abstract int GetBytes(char[] chars, int charIndex, int charCount,
 162                                         byte[] bytes, int byteIndex, bool flush);
 163
 164         // We expect this to be the workhorse for NLS Encodings, but for existing
 165         // ones we need a working (if slow) default implementation)
 166         //
 167         // WARNING WARNING WARNING
 168         //
 169         // WARNING: If this breaks it could be a security threat.  Obviously we
 170         // call this internally, so you need to make sure that your pointers, counts
 171         // and indexes are correct when you call this method.
 172         //
 173         // In addition, we have internal code, which will be marked as "safe" calling
 174         // this code.  However this code is dependent upon the implementation of an
 175         // external GetBytes() method, which could be overridden by a third party and
 176         // the results of which cannot be guaranteed.  We use that result to copy
 177         // the byte[] to our byte* output buffer.  If the result count was wrong, we
 178         // could easily overflow our output buffer.  Therefore we do an extra test
 179         // when we copy the buffer so that we don't overflow byteCount either.
 180         [CLSCompliant(false)]
 181         public virtual unsafe int GetBytes(char* chars, int charCount,
 182                                               byte* bytes, int byteCount, bool flush)
 183         {
 184             // Validate input parameters
 185             if (bytes == null || chars == null)
 186                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
 187                     SR.ArgumentNull_Array);
 188
 189             if (charCount < 0 || byteCount < 0)
 190                 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
 191                     SR.ArgumentOutOfRange_NeedNonNegNum);
 192             Contract.EndContractBlock();
 193
 194             // Get the char array to convert
 195             char[] arrChar = new char[charCount];
 196
 197             int index;
 198             for (index = 0; index < charCount; index++)
 199                 arrChar[index] = chars[index];
 200
 201             // Get the byte array to fill
 202             byte[] arrByte = new byte[byteCount];
 203
 204             // Do the work
 205             int result = GetBytes(arrChar, 0, charCount, arrByte, 0, flush);
 206
 207             Debug.Assert(result <= byteCount, "Returned more bytes than we have space for");
 208
 209             // Copy the byte array
 210             // WARNING: We MUST make sure that we don't copy too many bytes.  We can't
 211             // rely on result because it could be a 3rd party implementation.  We need
 212             // to make sure we never copy more than byteCount bytes no matter the value
 213             // of result
 214             if (result < byteCount)
 215                 byteCount = result;
 216
 217             // Don't copy too many bytes!
 218             for (index = 0; index < byteCount; index++)
 219                 bytes[index] = arrByte[index];
 220
 221             return byteCount;
 222         }
 223
 224         // This method is used to avoid running out of output buffer space.
 225         // It will encode until it runs out of chars, and then it will return
 226         // true if it the entire input was converted.  In either case it
 227         // will also return the number of converted chars and output bytes used.
 228         // It will only throw a buffer overflow exception if the entire lenght of bytes[] is
 229         // too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings)
 230         // We're done processing this buffer only if completed returns true.
 231         //
 232         // Might consider checking Max...Count to avoid the extra counting step.
 233         //
 234         // Note that if all of the input chars are not consumed, then we'll do a /2, which means
 235         // that its likely that we didn't consume as many chars as we could have.  For some
 236         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 237         public virtual void Convert(char[] chars, int charIndex, int charCount,
 238                                       byte[] bytes, int byteIndex, int byteCount, bool flush,
 239                                       out int charsUsed, out int bytesUsed, out bool completed)
 240         {
 241             // Validate parameters
 242             if (chars == null || bytes == null)
 243                 throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
 244                       SR.ArgumentNull_Array);
 245
 246             if (charIndex < 0 || charCount < 0)
 247                 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
 248                       SR.ArgumentOutOfRange_NeedNonNegNum);
 249
 250             if (byteIndex < 0 || byteCount < 0)
 251                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
 252                       SR.ArgumentOutOfRange_NeedNonNegNum);
 253
 254             if (chars.Length - charIndex < charCount)
 255                 throw new ArgumentOutOfRangeException(nameof(chars),
 256                       SR.ArgumentOutOfRange_IndexCountBuffer);
 257
 258             if (bytes.Length - byteIndex < byteCount)
 259                 throw new ArgumentOutOfRangeException(nameof(bytes),
 260                       SR.ArgumentOutOfRange_IndexCountBuffer);
 261             Contract.EndContractBlock();
 262
 263             charsUsed = charCount;
 264
 265             // Its easy to do if it won't overrun our buffer.
 266             // Note: We don't want to call unsafe version because that might be an untrusted version
 267             // which could be really unsafe and we don't want to mix it up.
 268             while (charsUsed > 0)
 269             {
 270                 if (GetByteCount(chars, charIndex, charsUsed, flush) <= byteCount)
 271                 {
 272                     bytesUsed = GetBytes(chars, charIndex, charsUsed, bytes, byteIndex, flush);
 273                     completed = (charsUsed == charCount &&
 274                         (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
 275                     return;
 276                 }
 277
 278                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 279                 flush = false;
 280                 charsUsed /= 2;
 281             }
 282
 283             // Oops, we didn't have anything, we'll have to throw an overflow
 284             throw new ArgumentException(SR.Argument_ConversionOverflow);
 285         }
 286
 287         // Same thing, but using pointers
 288         //
 289         // Might consider checking Max...Count to avoid the extra counting step.
 290         //
 291         // Note that if all of the input chars are not consumed, then we'll do a /2, which means
 292         // that its likely that we didn't consume as many chars as we could have.  For some
 293         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 294         [CLSCompliant(false)]
 295         public virtual unsafe void Convert(char* chars, int charCount,
 296                                              byte* bytes, int byteCount, bool flush,
 297                                              out int charsUsed, out int bytesUsed, out bool completed)
 298         {
 299             // Validate input parameters
 300             if (bytes == null || chars == null)
 301                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
 302                     SR.ArgumentNull_Array);
 303             if (charCount < 0 || byteCount < 0)
 304                 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
 305                     SR.ArgumentOutOfRange_NeedNonNegNum);
 306             Contract.EndContractBlock();
 307
 308             // Get ready to do it
 309             charsUsed = charCount;
 310
 311             // Its easy to do if it won't overrun our buffer.
 312             while (charsUsed > 0)
 313             {
 314                 if (GetByteCount(chars, charsUsed, flush) <= byteCount)
 315                 {
 316                     bytesUsed = GetBytes(chars, charsUsed, bytes, byteCount, flush);
 317                     completed = (charsUsed == charCount &&
 318                         (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
 319                     return;
 320                 }
 321
 322                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 323                 flush = false;
 324                 charsUsed /= 2;
 325             }
 326
 327             // Oops, we didn't have anything, we'll have to throw an overflow
 328             throw new ArgumentException(SR.Argument_ConversionOverflow);
 329         }
 330     }
 331 }
 332