1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Runtime.Serialization;
8 using System.Diagnostics;
9 using System.Diagnostics.Contracts;
13 // An Encoder is used to encode a sequence of blocks of characters into
14 // a sequence of blocks of bytes. Following instantiation of an encoder,
15 // sequential blocks of characters are converted into blocks of bytes through
16 // calls to the GetBytes method. The encoder maintains state between the
17 // conversions, allowing it to correctly encode character sequences that span
20 // Instances of specific implementations of the Encoder abstract base
21 // class are typically obtained through calls to the GetEncoder method
22 // of Encoding objects.
24 public abstract class Encoder
26 internal EncoderFallback m_fallback = null;
29 internal EncoderFallbackBuffer m_fallbackBuffer = null;
31 internal void SerializeEncoder(SerializationInfo info)
33 info.AddValue("m_fallback", this.m_fallback);
38 // We don't call default reset because default reset probably isn't good if we aren't initialized.
41 public EncoderFallback Fallback
51 throw new ArgumentNullException(nameof(value));
52 Contract.EndContractBlock();
54 // Can't change fallback if buffer is wrong
55 if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
56 throw new ArgumentException(
57 SR.Argument_FallbackBufferNotEmpty, nameof(value));
60 m_fallbackBuffer = null;
64 // Note: we don't test for threading here because async access to Encoders and Decoders
65 // doesn't work anyway.
66 public EncoderFallbackBuffer FallbackBuffer
70 if (m_fallbackBuffer == null)
72 if (m_fallback != null)
73 m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
75 m_fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer();
78 return m_fallbackBuffer;
82 internal bool InternalHasFallbackBuffer
86 return m_fallbackBuffer != null;
92 // Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder. This
93 // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
95 // If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset().
97 // Virtual implementation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
98 // We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
99 public virtual void Reset()
101 char[] charTemp = { };
102 byte[] byteTemp = new byte[GetByteCount(charTemp, 0, 0, true)];
103 GetBytes(charTemp, 0, 0, byteTemp, 0, true);
104 if (m_fallbackBuffer != null)
105 m_fallbackBuffer.Reset();
108 // Returns the number of bytes the next call to GetBytes will
109 // produce if presented with the given range of characters and the given
110 // value of the flush parameter. The returned value takes into
111 // account the state in which the encoder was left following the last call
112 // to GetBytes. The state of the encoder is not affected by a call
115 public abstract int GetByteCount(char[] chars, int index, int count, bool flush);
117 // We expect this to be the workhorse for NLS encodings
118 // unfortunately for existing overrides, it has to call the [] version,
119 // which is really slow, so avoid this method if you might be calling external encodings.
120 [CLSCompliant(false)]
121 public virtual unsafe int GetByteCount(char* chars, int count, bool flush)
123 // Validate input parameters
125 throw new ArgumentNullException(nameof(chars),
126 SR.ArgumentNull_Array);
129 throw new ArgumentOutOfRangeException(nameof(count),
130 SR.ArgumentOutOfRange_NeedNonNegNum);
131 Contract.EndContractBlock();
133 char[] arrChar = new char[count];
136 for (index = 0; index < count; index++)
137 arrChar[index] = chars[index];
139 return GetByteCount(arrChar, 0, count, flush);
142 // Encodes a range of characters in a character array into a range of bytes
143 // in a byte array. The method encodes charCount characters from
144 // chars starting at index charIndex, storing the resulting
145 // bytes in bytes starting at index byteIndex. The encoding
146 // takes into account the state in which the encoder was left following the
147 // last call to this method. The flush parameter indicates whether
148 // the encoder should flush any shift-states and partial characters at the
149 // end of the conversion. To ensure correct termination of a sequence of
150 // blocks of encoded bytes, the last call to GetBytes should specify
151 // a value of true for the flush parameter.
153 // An exception occurs if the byte array is not large enough to hold the
154 // complete encoding of the characters. The GetByteCount method can
155 // be used to determine the exact number of bytes that will be produced for
156 // a given range of characters. Alternatively, the GetMaxByteCount
157 // method of the Encoding that produced this encoder can be used to
158 // determine the maximum number of bytes that will be produced for a given
159 // number of characters, regardless of the actual character values.
161 public abstract int GetBytes(char[] chars, int charIndex, int charCount,
162 byte[] bytes, int byteIndex, bool flush);
164 // We expect this to be the workhorse for NLS Encodings, but for existing
165 // ones we need a working (if slow) default implementation)
167 // WARNING WARNING WARNING
169 // WARNING: If this breaks it could be a security threat. Obviously we
170 // call this internally, so you need to make sure that your pointers, counts
171 // and indexes are correct when you call this method.
173 // In addition, we have internal code, which will be marked as "safe" calling
174 // this code. However this code is dependent upon the implementation of an
175 // external GetBytes() method, which could be overridden by a third party and
176 // the results of which cannot be guaranteed. We use that result to copy
177 // the byte[] to our byte* output buffer. If the result count was wrong, we
178 // could easily overflow our output buffer. Therefore we do an extra test
179 // when we copy the buffer so that we don't overflow byteCount either.
180 [CLSCompliant(false)]
181 public virtual unsafe int GetBytes(char* chars, int charCount,
182 byte* bytes, int byteCount, bool flush)
184 // Validate input parameters
185 if (bytes == null || chars == null)
186 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
187 SR.ArgumentNull_Array);
189 if (charCount < 0 || byteCount < 0)
190 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
191 SR.ArgumentOutOfRange_NeedNonNegNum);
192 Contract.EndContractBlock();
194 // Get the char array to convert
195 char[] arrChar = new char[charCount];
198 for (index = 0; index < charCount; index++)
199 arrChar[index] = chars[index];
201 // Get the byte array to fill
202 byte[] arrByte = new byte[byteCount];
205 int result = GetBytes(arrChar, 0, charCount, arrByte, 0, flush);
207 Debug.Assert(result <= byteCount, "Returned more bytes than we have space for");
209 // Copy the byte array
210 // WARNING: We MUST make sure that we don't copy too many bytes. We can't
211 // rely on result because it could be a 3rd party implementation. We need
212 // to make sure we never copy more than byteCount bytes no matter the value
214 if (result < byteCount)
217 // Don't copy too many bytes!
218 for (index = 0; index < byteCount; index++)
219 bytes[index] = arrByte[index];
224 // This method is used to avoid running out of output buffer space.
225 // It will encode until it runs out of chars, and then it will return
226 // true if it the entire input was converted. In either case it
227 // will also return the number of converted chars and output bytes used.
228 // It will only throw a buffer overflow exception if the entire lenght of bytes[] is
229 // too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings)
230 // We're done processing this buffer only if completed returns true.
232 // Might consider checking Max...Count to avoid the extra counting step.
234 // Note that if all of the input chars are not consumed, then we'll do a /2, which means
235 // that its likely that we didn't consume as many chars as we could have. For some
236 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
237 public virtual void Convert(char[] chars, int charIndex, int charCount,
238 byte[] bytes, int byteIndex, int byteCount, bool flush,
239 out int charsUsed, out int bytesUsed, out bool completed)
241 // Validate parameters
242 if (chars == null || bytes == null)
243 throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
244 SR.ArgumentNull_Array);
246 if (charIndex < 0 || charCount < 0)
247 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
248 SR.ArgumentOutOfRange_NeedNonNegNum);
250 if (byteIndex < 0 || byteCount < 0)
251 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
252 SR.ArgumentOutOfRange_NeedNonNegNum);
254 if (chars.Length - charIndex < charCount)
255 throw new ArgumentOutOfRangeException(nameof(chars),
256 SR.ArgumentOutOfRange_IndexCountBuffer);
258 if (bytes.Length - byteIndex < byteCount)
259 throw new ArgumentOutOfRangeException(nameof(bytes),
260 SR.ArgumentOutOfRange_IndexCountBuffer);
261 Contract.EndContractBlock();
263 charsUsed = charCount;
265 // Its easy to do if it won't overrun our buffer.
266 // Note: We don't want to call unsafe version because that might be an untrusted version
267 // which could be really unsafe and we don't want to mix it up.
268 while (charsUsed > 0)
270 if (GetByteCount(chars, charIndex, charsUsed, flush) <= byteCount)
272 bytesUsed = GetBytes(chars, charIndex, charsUsed, bytes, byteIndex, flush);
273 completed = (charsUsed == charCount &&
274 (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
278 // Try again with 1/2 the count, won't flush then 'cause won't read it all
283 // Oops, we didn't have anything, we'll have to throw an overflow
284 throw new ArgumentException(SR.Argument_ConversionOverflow);
287 // Same thing, but using pointers
289 // Might consider checking Max...Count to avoid the extra counting step.
291 // Note that if all of the input chars are not consumed, then we'll do a /2, which means
292 // that its likely that we didn't consume as many chars as we could have. For some
293 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
294 [CLSCompliant(false)]
295 public virtual unsafe void Convert(char* chars, int charCount,
296 byte* bytes, int byteCount, bool flush,
297 out int charsUsed, out int bytesUsed, out bool completed)
299 // Validate input parameters
300 if (bytes == null || chars == null)
301 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
302 SR.ArgumentNull_Array);
303 if (charCount < 0 || byteCount < 0)
304 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
305 SR.ArgumentOutOfRange_NeedNonNegNum);
306 Contract.EndContractBlock();
308 // Get ready to do it
309 charsUsed = charCount;
311 // Its easy to do if it won't overrun our buffer.
312 while (charsUsed > 0)
314 if (GetByteCount(chars, charsUsed, flush) <= byteCount)
316 bytesUsed = GetBytes(chars, charsUsed, bytes, byteCount, flush);
317 completed = (charsUsed == charCount &&
318 (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
322 // Try again with 1/2 the count, won't flush then 'cause won't read it all
327 // Oops, we didn't have anything, we'll have to throw an overflow
328 throw new ArgumentException(SR.Argument_ConversionOverflow);