currentUInt32 = Unsafe.ReadUnaligned<ushort>(pBuffer);
if (!AllBytesInUInt32AreAscii(currentUInt32))
{
+ if (!BitConverter.IsLittleEndian)
+ {
+ currentUInt32 = currentUInt32 << 16;
+ }
goto FoundNonAsciiData;
}
asciiData = Unsafe.ReadUnaligned<ushort>(pAsciiBuffer + currentOffset);
if (!AllBytesInUInt32AreAscii(asciiData))
{
+ if (!BitConverter.IsLittleEndian)
+ {
+ asciiData = asciiData << 16;
+ }
goto FoundNonAsciiData;
}
// Drain ASCII bytes one at a time.
- while (((byte)asciiData & 0x80) == 0)
+ if (BitConverter.IsLittleEndian)
{
- pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
- currentOffset++;
- asciiData >>= 8;
+ while (((byte)asciiData & 0x80) == 0)
+ {
+ pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
+ currentOffset++;
+ asciiData >>= 8;
+ }
+ }
+ else
+ {
+ while ((asciiData & 0x80000000) == 0)
+ {
+ asciiData = BitOperations.RotateLeft(asciiData, 8);
+ pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
+ currentOffset++;
+ }
}
goto Finish;
tempB |= tempA;
uint tempC = (value << 2) & 0x0000_0F00u; // = [ 00000000 00000000 0000yyyy 00000000 ]
- uint tempD = (value >> 6) & 0x0003_0000u; // = [ 00000000 00000000 00yy0000 00000000 ]
+ uint tempD = (value >> 4) & 0x0000_3000u; // = [ 00000000 00000000 00yy0000 00000000 ]
tempD |= tempC;
uint tempE = (value & 0x3Fu) + 0xF080_8080u; // = [ 11110000 10000000 10000000 10xxxxxx ]
// want to return [ ######## ######## 110yyyyy 10xxxxxx ]
uint temp = (value >> 16) & 0x3Fu; // [ 00000000 00000000 00000000 00xxxxxx ]
- value = (value >> 22) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ]
+ value = (value >> 14) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ]
return value + temp + 0xC080u;
}
}
// Return statement is written this way to work around https://github.com/dotnet/runtime/issues/4207.
return (BitConverter.IsLittleEndian && (((value - 0x8080_80F0u) & 0xC0C0_C0F8u) == 0))
- || (!BitConverter.IsLittleEndian && (((value - 0xF080_8000u) & 0xF8C0_C0C0u) == 0));
+ || (!BitConverter.IsLittleEndian && (((value - 0xF080_8080u) & 0xF8C0_C0C0u) == 0));
}
/// <summary>
}
else
{
- pOutputBuffer[0] = (byte)(thisDWord >> 24); // extract [ AA 00 ## ## ]
+ pOutputBuffer[0] = (byte)(thisDWord >> 16); // extract [ 00 AA ## ## ]
}
pInputBuffer++;
if (!BitConverter.IsLittleEndian)
{
// we know that we are writing a primitive type, so just do a simple swap
- Debug.Fail("Re-review this code if/when we start running on big endian systems");
for (int i = 0; i < bufferUsed; i += typeLength)
{
for (int j = 0; j < typeLength / 2; j++)
if (!BitConverter.IsLittleEndian)
{
// we know that we are reading a primitive type, so just do a simple swap
- Debug.Fail("Re-review this code if/when we start running on big endian systems");
for (int i = 0; i < bufferUsed; i += typeLength)
{
for (int j = 0; j < typeLength / 2; j++)
<Reference Include="System.Collections" />
<Reference Include="System.Diagnostics.Debug" />
<Reference Include="System.Diagnostics.Tools" />
+ <Reference Include="System.Memory" />
<Reference Include="System.Resources.ResourceManager" />
<Reference Include="System.Runtime" />
<Reference Include="System.Runtime.Extensions" />
<Reference Include="System.Runtime.InteropServices" />
<Reference Include="System.Threading" />
</ItemGroup>
+ <ItemGroup Condition="!$(TargetFramework.StartsWith('$(NetCoreAppCurrent)'))">
+ <PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
+ </ItemGroup>
</Project>
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
+using System.Buffers.Binary;
using System.Reflection;
using System.IO;
using System.Diagnostics;
internal short unused1; // Add an unused WORD so that CodePages is aligned with DWORD boundary.
}
private const int CODEPAGE_DATA_FILE_HEADER_SIZE = 44;
+ internal static unsafe void ReadCodePageDataFileHeader(Stream stream, byte[] codePageDataFileHeader)
+ {
+ stream.Read(codePageDataFileHeader, 0, codePageDataFileHeader.Length);
+ if (!BitConverter.IsLittleEndian)
+ {
+ fixed (byte* pBytes = &codePageDataFileHeader[0])
+ {
+ CodePageDataFileHeader* p = (CodePageDataFileHeader*)pBytes;
+ char *pTableName = &p->TableName;
+ for (int i = 0; i < 16; i++)
+ {
+ pTableName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pTableName[i]);
+ }
+ ushort *pVersion = &p->Version;
+ for (int i = 0; i < 4; i++)
+ {
+ pVersion[i] = BinaryPrimitives.ReverseEndianness(pVersion[i]);
+ }
+ p->CodePageCount = BinaryPrimitives.ReverseEndianness(p->CodePageCount);
+ }
+ }
+ }
[StructLayout(LayoutKind.Explicit, Pack = 2)]
internal unsafe struct CodePageIndex
[FieldOffset(0x24)]
internal int Offset; // DWORD
}
+ internal static unsafe void ReadCodePageIndex(Stream stream, byte[] codePageIndex)
+ {
+ stream.Read(codePageIndex, 0, codePageIndex.Length);
+ if (!BitConverter.IsLittleEndian)
+ {
+ fixed (byte* pBytes = &codePageIndex[0])
+ {
+ CodePageIndex* p = (CodePageIndex*)pBytes;
+ char *pCodePageName = &p->CodePageName;
+ for (int i = 0; i < 16; i++)
+ {
+ pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
+ }
+ p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
+ p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
+ p->Offset = BinaryPrimitives.ReverseEndianness(p->Offset);
+ }
+ }
+ }
[StructLayout(LayoutKind.Explicit)]
internal unsafe struct CodePageHeader
internal ushort ByteReplace; // WORD // default replacement bytes
}
private const int CODEPAGE_HEADER_SIZE = 48;
+ internal static unsafe void ReadCodePageHeader(Stream stream, byte[] codePageHeader)
+ {
+ stream.Read(codePageHeader, 0, codePageHeader!.Length);
+ if (!BitConverter.IsLittleEndian)
+ {
+ fixed (byte* pBytes = &codePageHeader[0])
+ {
+ CodePageHeader* p = (CodePageHeader*)pBytes;
+ char *pCodePageName = &p->CodePageName;
+ for (int i = 0; i < 16; i++)
+ {
+ pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
+ }
+ p->VersionMajor = BinaryPrimitives.ReverseEndianness(p->VersionMajor);
+ p->VersionMinor = BinaryPrimitives.ReverseEndianness(p->VersionMinor);
+ p->VersionRevision = BinaryPrimitives.ReverseEndianness(p->VersionRevision);
+ p->VersionBuild = BinaryPrimitives.ReverseEndianness(p->VersionBuild);
+ p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
+ p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
+ p->UnicodeReplace = (char)BinaryPrimitives.ReverseEndianness((ushort)p->UnicodeReplace);
+ p->ByteReplace = BinaryPrimitives.ReverseEndianness(p->ByteReplace);
+ }
+ }
+ }
// Initialize our global stuff
private static readonly byte[] s_codePagesDataHeader = new byte[CODEPAGE_DATA_FILE_HEADER_SIZE];
}
// Read the header
- stream.Read(s_codePagesDataHeader, 0, s_codePagesDataHeader.Length);
+ ReadCodePageDataFileHeader(stream, s_codePagesDataHeader);
return stream;
}
CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes;
for (int i = 0; i < codePagesCount; i++)
{
- s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
+ ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);
if (pCodePageIndex->CodePage == codePage)
{
// Found it!
long position = s_codePagesEncodingDataStream.Position;
s_codePagesEncodingDataStream.Seek((long)pCodePageIndex->Offset, SeekOrigin.Begin);
- s_codePagesEncodingDataStream.Read(m_codePageHeader, 0, m_codePageHeader!.Length);
+ ReadCodePageHeader(s_codePagesEncodingDataStream, m_codePageHeader);
m_firstDataWordOffset = (int)s_codePagesEncodingDataStream.Position; // stream now pointing to the codepage data
if (i == codePagesCount - 1) // last codepage
// Read Next codepage data to get the offset and then calculate the size
s_codePagesEncodingDataStream.Seek(position, SeekOrigin.Begin);
int currentOffset = pCodePageIndex->Offset;
- s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
+ ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);
m_dataSize = pCodePageIndex->Offset - currentOffset - m_codePageHeader.Length;
}
CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes;
for (int i = 0; i < codePagesCount; i++)
{
- s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
+ ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);
if (pCodePageIndex->CodePage == codePage)
{
// The .NET Foundation licenses this file to you under the MIT license.
using System.IO;
+using System.Buffers.Binary;
using System.Runtime.Serialization;
using System.Runtime.CompilerServices;
{
internal abstract partial class BaseCodePageEncoding : EncodingNLS, ISerializable
{
+ internal static unsafe void ReadCodePageIndex(Stream stream, Span<byte> codePageIndex)
+ {
+ stream.Read(codePageIndex);
+ if (!BitConverter.IsLittleEndian)
+ {
+ fixed (byte* pBytes = &codePageIndex[0])
+ {
+ CodePageIndex* p = (CodePageIndex*)pBytes;
+ char *pCodePageName = &p->CodePageName;
+ for (int i = 0; i < 16; i++)
+ {
+ pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
+ }
+ p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
+ p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
+ p->Offset = BinaryPrimitives.ReverseEndianness(p->Offset);
+ }
+ }
+ }
+
internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider provider)
{
lock (s_streamLock)
for (int i = 0; i < codePagesCount; i++)
{
- s_codePagesEncodingDataStream.Read(pCodePageIndex);
+ ReadCodePageIndex(s_codePagesEncodingDataStream, pCodePageIndex);
string codePageName;
switch (codePageIndex.CodePage)
// The .NET Foundation licenses this file to you under the MIT license.
using System;
+using System.Buffers.Binary;
using System.IO;
using System.Diagnostics;
using System.Text;
{
}
+ internal static unsafe char ReadChar(char *pChar)
+ {
+ if (BitConverter.IsLittleEndian)
+ {
+ return *pChar;
+ }
+ else
+ {
+ return (char)BinaryPrimitives.ReverseEndianness((ushort)*pChar);
+ }
+ }
+
// MBCS data section:
//
// We treat each multibyte pattern as 2 bytes in our table. If it's a single byte, then the high byte
while (bytePosition < 0x10000)
{
// Get the next byte
- char input = *pData;
+ char input = ReadChar(pData);
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
- bytePosition = (int)(*pData);
+ bytePosition = (int)ReadChar(pData);
pData++;
continue;
}
while (bytesPosition < 0x10000)
{
// Get the next byte
- char input = *pData;
+ char input = ReadChar(pData);
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
- bytesPosition = (int)(*pData);
+ bytesPosition = (int)ReadChar(pData);
pData++;
}
else if (input < 0x20 && input > 0)
// Now pData should be pointing to first word of bytes -> unicode best fit table
// (which we're also not using at the moment)
int iBestFitCount = 0;
- bytesPosition = *pData;
+ bytesPosition = ReadChar(pData);
pData++;
while (bytesPosition < 0x10000)
{
// Get the next byte
- char input = *pData;
+ char input = ReadChar(pData);
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
- bytesPosition = (int)(*pData);
+ bytesPosition = (int)ReadChar(pData);
pData++;
}
else if (input < 0x20 && input > 0)
// Now we know how many best fits we have, so go back & read them in
iBestFitCount = 0;
pData = pBytes2Unicode;
- bytesPosition = *pData;
+ bytesPosition = ReadChar(pData);
pData++;
bool bOutOfOrder = false;
while (bytesPosition < 0x10000)
{
// Get the next byte
- char input = *pData;
+ char input = ReadChar(pData);
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
- bytesPosition = (int)(*pData);
+ bytesPosition = (int)ReadChar(pData);
pData++;
}
else if (input < 0x20 && input > 0)
// Now were at beginning of Unicode -> Bytes best fit table, need to count them
char* pUnicode2Bytes = pData;
- int unicodePosition = *(pData++);
+ int unicodePosition = ReadChar(pData++);
iBestFitCount = 0;
while (unicodePosition < 0x10000)
{
// Get the next byte
- char input = *pData;
+ char input = ReadChar(pData);
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
- unicodePosition = (int)*pData;
+ unicodePosition = (int)ReadChar(pData);
pData++;
}
else if (input < 0x20 && input > 0)
// Now do it again to fill the array with real values
pData = pUnicode2Bytes;
- unicodePosition = *(pData++);
+ unicodePosition = ReadChar(pData++);
iBestFitCount = 0;
while (unicodePosition < 0x10000)
{
// Get the next byte
- char input = *pData;
+ char input = ReadChar(pData);
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
- unicodePosition = (int)*pData;
+ unicodePosition = (int)ReadChar(pData);
pData++;
}
else if (input < 0x20 && input > 0)
// The .NET Foundation licenses this file to you under the MIT license.
using System;
+using System.Buffers.Binary;
using System.IO;
using System.Diagnostics;
using System.Text;
{
}
+ internal static unsafe ushort ReadUInt16(byte* pByte)
+ {
+ if (BitConverter.IsLittleEndian)
+ {
+ return *(ushort*)pByte;
+ }
+ else
+ {
+ return BinaryPrimitives.ReverseEndianness(*(ushort*)pByte);
+ }
+ }
+
// We have a managed code page entry, so load our tables
// SBCS data section looks like:
//
fixed (byte* pBuffer = &buffer[0])
{
- char* pTemp = (char*)pBuffer;
for (int b = 0; b < 256; b++)
{
+ char c = (char)ReadUInt16(pBuffer + 2 * b);
// Don't want to force 0's to map Unicode wrong. 0 byte == 0 unicode already taken care of
- if (pTemp[b] != 0 || b == 0)
+ if (c != 0 || b == 0)
{
- mapBytesToUnicode[b] = pTemp[b];
+ mapBytesToUnicode[b] = c;
- if (pTemp[b] != UNKNOWN_CHAR)
- mapUnicodeToBytes[pTemp[b]] = (byte)b;
+ if (c != UNKNOWN_CHAR)
+ mapUnicodeToBytes[c] = (byte)b;
}
else
{
// See if our words are zero
ushort byteTemp;
- while ((byteTemp = *((ushort*)pData)) != 0)
+ while ((byteTemp = ReadUInt16(pData)) != 0)
{
Debug.Assert(arrayTemp[byteTemp] == UNKNOWN_CHAR, $"[SBCSCodePageEncoding::ReadBestFitTable] Expected unallocated byte (not 0x{(int)arrayTemp[byteTemp]:X2}) for best fit byte at 0x{byteTemp:X2} for code page {CodePage}");
pData += 2;
- arrayTemp[byteTemp] = *((char*)pData);
+ arrayTemp[byteTemp] = (char)ReadUInt16(pData);
pData += 2;
}
// Now do the UnicodeToBytes Best Fit mapping (this is the one we normally think of when we say "best fit")
// pData should be pointing at the first data point for Bytes->Unicode table
- int unicodePosition = *((ushort*)pData);
+ int unicodePosition = ReadUInt16(pData);
pData += 2;
while (unicodePosition < 0x10000)
if (input == 1)
{
// Use next 2 bytes as our byte position
- unicodePosition = *((ushort*)pData);
+ unicodePosition = ReadUInt16(pData);
pData += 2;
}
else if (input < 0x20 && input > 0 && input != 0x1e)
// Now actually read in the data
// reset pData should be pointing at the first data point for Bytes->Unicode table
pData = pUnicodeToSBCS;
- unicodePosition = *((ushort*)pData);
+ unicodePosition = ReadUInt16(pData);
pData += 2;
iBestFitCount = 0;
if (input == 1)
{
// Use next 2 bytes as our byte position
- unicodePosition = *((ushort*)pData);
+ unicodePosition = ReadUInt16(pData);
pData += 2;
}
else if (input < 0x20 && input > 0 && input != 0x1e)