From: Levi Broderick Date: Tue, 6 Nov 2018 00:06:32 +0000 (-0800) Subject: Add support for BSWAP intrinsic (#18398) X-Git-Tag: accepted/tizen/unified/20190422.045933~812 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f72025c8b6d8a4fc3b4e22e2a3b6e1afeaef15ff;p=platform%2Fupstream%2Fcoreclr.git Add support for BSWAP intrinsic (#18398) With this change, the JIT will recognize a call to BinaryPrimitives.ReverseEndianness and will emit a bswap instruction. This logic is currently only hooked up for x86 and x64; ARM still uses fallback logic. If the JIT can't emit a bswap instruction (for example, trying to emit a 64-bit bswap in a 32-bit process), it will fall back to a software implementation, so the APIs will work across all architectures. --- diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs b/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs index ae3acdf..d76fbd8 100644 --- a/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs +++ b/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs @@ -30,21 +30,21 @@ namespace System.Buffers.Binary /// /// Reverses a primitive value - performs an endianness swap /// + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static short ReverseEndianness(short value) - { - return (short)((value & 0x00FF) << 8 | (value & 0xFF00) >> 8); - } + public static short ReverseEndianness(short value) => (short)ReverseEndianness((ushort)value); /// /// Reverses a primitive value - performs an endianness swap /// + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int ReverseEndianness(int value) => (int)ReverseEndianness((uint)value); /// /// Reverses a primitive value - performs an endianness swap /// + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long ReverseEndianness(long value) => (long)ReverseEndianness((ulong)value); @@ -54,7 +54,7 @@ namespace System.Buffers.Binary /// rather than having to skip byte fields. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static byte ReverseEndianness(byte value) + public static byte ReverseEndianness(byte value) { return value; } @@ -63,6 +63,7 @@ namespace System.Buffers.Binary /// Reverses a primitive value - performs an endianness swap /// [CLSCompliant(false)] + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ushort ReverseEndianness(ushort value) { @@ -80,6 +81,7 @@ namespace System.Buffers.Binary /// Reverses a primitive value - performs an endianness swap /// [CLSCompliant(false)] + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ReverseEndianness(uint value) { @@ -113,6 +115,7 @@ namespace System.Buffers.Binary /// Reverses a primitive value - performs an endianness swap /// [CLSCompliant(false)] + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong ReverseEndianness(ulong value) { diff --git a/src/jit/codegen.h b/src/jit/codegen.h index fa12670..8910ee6 100644 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -1035,6 +1035,7 @@ protected: void genCodeForIndexAddr(GenTreeIndexAddr* tree); void genCodeForIndir(GenTreeIndir* tree); void genCodeForNegNot(GenTree* tree); + void genCodeForBswap(GenTree* tree); void genCodeForLclVar(GenTreeLclVar* tree); void genCodeForLclFld(GenTreeLclFld* tree); void genCodeForStoreLclFld(GenTreeLclFld* tree); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 54ff6dc..92c858e 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -520,6 +520,46 @@ void CodeGen::genCodeForNegNot(GenTree* tree) genProduceReg(tree); } +//------------------------------------------------------------------------ +// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForBswap(GenTree* tree) +{ + // TODO: If we're swapping immediately after a read from memory or immediately before + // a write to memory, use the MOVBE instruction instead of the BSWAP instruction if + // the platform supports it. + + assert(tree->OperIs(GT_BSWAP, GT_BSWAP16)); + + regNumber targetReg = tree->gtRegNum; + var_types targetType = tree->TypeGet(); + + GenTree* operand = tree->gtGetOp1(); + assert(operand->isUsedFromReg()); + regNumber operandReg = genConsumeReg(operand); + + if (operandReg != targetReg) + { + inst_RV_RV(INS_mov, targetReg, operandReg, targetType); + } + + if (tree->OperIs(GT_BSWAP)) + { + // 32-bit and 64-bit byte swaps use "bswap reg" + inst_RV(INS_bswap, targetReg, targetType); + } + else + { + // 16-bit byte swaps use "ror reg.16, 8" + inst_RV_IV(INS_ror_N, targetReg, 8 /* val */, emitAttr::EA_2BYTE); + } + + genProduceReg(tree); +} + // Generate code to get the high N bits of a N*N=2N bit multiplication result void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { @@ -1562,6 +1602,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForNegNot(treeNode); break; + case GT_BSWAP: + case GT_BSWAP16: + genCodeForBswap(treeNode); + break; + case GT_DIV: if (varTypeIsFloating(treeNode->TypeGet())) { diff --git a/src/jit/compiler.h b/src/jit/compiler.h index eb6255d..8f8083a 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -9741,6 +9741,8 @@ public: // Standard unary operators case GT_NOT: case GT_NEG: + case GT_BSWAP: + case GT_BSWAP16: case GT_COPY: case GT_RELOAD: case GT_ARR_LENGTH: diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index 20fda04..e798741 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -4312,6 +4312,8 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_STORE_LCL_FLD: case GT_NOT: case GT_NEG: + case GT_BSWAP: + case GT_BSWAP16: case GT_COPY: case GT_RELOAD: case GT_ARR_LENGTH: diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 5add863..f572ce1 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -11018,6 +11018,32 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) dst += emitOutputByte(dst, code); break; + case INS_bswap: + { + assert(size >= EA_4BYTE && size <= EA_PTRSIZE); // 16-bit BSWAP is undefined + + // The Intel instruction set reference for BSWAP states that extended registers + // should be enabled via REX.R, but per Vol. 2A, Sec. 2.2.1.2 (see also Figure 2-7), + // REX.B should instead be used if the register is encoded in the opcode byte itself. + // Therefore the default logic of insEncodeReg012 is correct for this case. + + code = insCodeRR(ins); + + if (TakesRexWPrefix(ins, size)) + { + code = AddRexWPrefix(ins, code); + } + + // Register... + unsigned regcode = insEncodeReg012(ins, reg, size, &code); + + // Output the REX prefix + dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); + + dst += emitOutputWord(dst, code | (regcode << 8)); + break; + } + case INS_seto: case INS_setno: case INS_setb: diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index c5cf957..70a5bf6 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -4748,6 +4748,8 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use) case GT_NOP: case GT_RETURN: case GT_RETFILT: + case GT_BSWAP: + case GT_BSWAP16: if (def == this->AsUnOp()->gtOp1) { *use = &this->AsUnOp()->gtOp1; @@ -8348,6 +8350,8 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_NULLCHECK: case GT_PUTARG_REG: case GT_PUTARG_STK: + case GT_BSWAP: + case GT_BSWAP16: #if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: #endif // FEATURE_ARG_SPLIT @@ -12906,6 +12910,15 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) i1 = -i1; break; + case GT_BSWAP: + i1 = ((i1 >> 24) & 0xFF) | ((i1 >> 8) & 0xFF00) | ((i1 << 8) & 0xFF0000) | + ((i1 << 24) & 0xFF000000); + break; + + case GT_BSWAP16: + i1 = ((i1 >> 8) & 0xFF) | ((i1 << 8) & 0xFF00); + break; + case GT_CAST: // assert (genActualType(tree->CastToType()) == tree->gtType); switch (tree->CastToType()) @@ -13041,6 +13054,13 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) lval1 = -lval1; break; + case GT_BSWAP: + lval1 = ((lval1 >> 56) & 0xFF) | ((lval1 >> 40) & 0xFF00) | ((lval1 >> 24) & 0xFF0000) | + ((lval1 >> 8) & 0xFF000000) | ((lval1 << 8) & 0xFF00000000) | + ((lval1 << 24) & 0xFF0000000000) | ((lval1 << 40) & 0xFF000000000000) | + ((lval1 << 56) & 0xFF00000000000000); + break; + case GT_CAST: assert(genActualType(tree->CastToType()) == tree->gtType); switch (tree->CastToType()) diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index 9fd1275..7fe5b2f 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -101,6 +101,9 @@ GTNODE(INIT_VAL , GenTreeOp ,0,GTK_UNOP) // Initi GTNODE(RUNTIMELOOKUP , GenTreeRuntimeLookup, 0,GTK_UNOP|GTK_EXOP) // Runtime handle lookup +GTNODE(BSWAP , GenTreeOp ,0,GTK_UNOP) // Byte swap (32-bit or 64-bit) +GTNODE(BSWAP16 , GenTreeOp ,0,GTK_UNOP) // Byte swap (16-bit) + //----------------------------------------------------------------------------- // Binary operators (2 operands): //----------------------------------------------------------------------------- diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 5a91904..68071e4 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -3934,6 +3934,45 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } + case NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness: + { + assert(sig->numArgs == 1); + + // We expect the return type of the ReverseEndianness routine to match the type of the + // one and only argument to the method. We use a special instruction for 16-bit + // BSWAPs since on x86 processors this is implemented as ROR <16-bit reg>, 8. Additionally, + // we only emit 64-bit BSWAP instructions on 64-bit archs; if we're asked to perform a + // 64-bit byte swap on a 32-bit arch, we'll fall to the default case in the switch block below. + + switch (sig->retType) + { + case CorInfoType::CORINFO_TYPE_SHORT: + case CorInfoType::CORINFO_TYPE_USHORT: + retNode = gtNewOperNode(GT_BSWAP16, callType, impPopStack().val); + break; + + case CorInfoType::CORINFO_TYPE_INT: + case CorInfoType::CORINFO_TYPE_UINT: +#ifdef _TARGET_64BIT_ + case CorInfoType::CORINFO_TYPE_LONG: + case CorInfoType::CORINFO_TYPE_ULONG: +#endif // _TARGET_64BIT_ + retNode = gtNewOperNode(GT_BSWAP, callType, impPopStack().val); + break; + + default: + // This default case gets hit on 32-bit archs when a call to a 64-bit overload + // of ReverseEndianness is encountered. In that case we'll let JIT treat this as a standard + // method call, where the implementation decomposes the operation into two 32-bit + // bswap routines. If the input to the 64-bit function is a constant, then we rely + // on inlining + constant folding of 32-bit bswaps to effectively constant fold + // the 64-bit call site. + break; + } + + break; + } + default: break; } @@ -4072,6 +4111,15 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = NI_Math_Round; } } +#if defined(_TARGET_XARCH_) // We currently only support BSWAP on x86 + else if (strcmp(namespaceName, "System.Buffers.Binary") == 0) + { + if ((strcmp(className, "BinaryPrimitives") == 0) && (strcmp(methodName, "ReverseEndianness") == 0)) + { + result = NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness; + } + } +#endif // !defined(_TARGET_XARCH_) else if (strcmp(namespaceName, "System.Collections.Generic") == 0) { if ((strcmp(className, "EqualityComparer`1") == 0) && (strcmp(methodName, "get_Default") == 0)) diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 84727f3..db02fc2 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -63,6 +63,10 @@ INST5(inc_l, "inc", IUM_RW, 0x0000FE, BAD_CODE, INST5(dec, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048, INS_FLAGS_WritesFlags) INST5(dec_l, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE, INS_FLAGS_WritesFlags) +// Multi-byte opcodes without modrm are represented in mixed endian fashion. +// See comment around quarter way through this file for more information. +INST5(bswap, "bswap", IUM_RW, 0x0F00C8, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C80F, INS_FLAGS_None) + // id nm um mr mi rm a4 flags INST4(add, "add", IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesFlags) INST4(or, "or", IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesFlags) diff --git a/src/jit/namedintrinsiclist.h b/src/jit/namedintrinsiclist.h index 187ef3f..edf006f 100644 --- a/src/jit/namedintrinsiclist.h +++ b/src/jit/namedintrinsiclist.h @@ -9,11 +9,12 @@ enum NamedIntrinsic : unsigned short { - NI_Illegal = 0, - NI_System_Enum_HasFlag = 1, - NI_MathF_Round = 2, - NI_Math_Round = 3, - NI_System_Collections_Generic_EqualityComparer_get_Default = 4, + NI_Illegal = 0, + NI_System_Enum_HasFlag = 1, + NI_MathF_Round = 2, + NI_Math_Round = 3, + NI_System_Collections_Generic_EqualityComparer_get_Default = 4, + NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness = 5, #ifdef FEATURE_HW_INTRINSICS NI_HW_INTRINSIC_START, #if defined(_TARGET_XARCH_) diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp index 4c37fc0..52157c6 100644 --- a/src/jit/optcse.cpp +++ b/src/jit/optcse.cpp @@ -2603,6 +2603,8 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) case GT_NEG: case GT_NOT: + case GT_BSWAP: + case GT_BSWAP16: case GT_CAST: return true; // CSE these Unary Operators diff --git a/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness.cs b/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness.cs new file mode 100644 index 0000000..b650fc8 --- /dev/null +++ b/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness.cs @@ -0,0 +1,136 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Buffers.Binary; +using Internal; + +namespace BinaryPrimitivesReverseEndianness +{ + class Program + { + public const int Pass = 100; + public const int Fail = 0; + + private const ushort ConstantUInt16Input = 0x9876; + private const ushort ConstantUInt16Expected = 0x7698; + + private const uint ConstantUInt32Input = 0x98765432; + private const uint ConstantUInt32Expected = 0x32547698; + + private const ulong ConstantUInt64Input = 0xfedcba9876543210; + private const ulong ConstantUInt64Expected = 0x1032547698badcfe; + + + static int Main(string[] args) + { + /* + * CONST VALUE TESTS + */ + + ushort swappedUInt16 = BinaryPrimitives.ReverseEndianness(ConstantUInt16Input); + if (swappedUInt16 != ConstantUInt16Expected) + { + Console.WriteLine($"BinaryPrimitives.ReverseEndianness(const UInt16) failed."); + Console.WriteLine($"Input: 0x{ConstantUInt16Input:X4}"); + Console.WriteLine($"Output: 0x{swappedUInt16:X4}"); + Console.WriteLine($"Expected: 0x{ConstantUInt16Expected:X4}"); + } + + uint swappedUInt32 = BinaryPrimitives.ReverseEndianness(ConstantUInt32Input); + if (swappedUInt32 != ConstantUInt32Expected) + { + Console.WriteLine($"BinaryPrimitives.ReverseEndianness(const UInt32) failed."); + Console.WriteLine($"Input: 0x{ConstantUInt32Input:X8}"); + Console.WriteLine($"Output: 0x{swappedUInt32:X8}"); + Console.WriteLine($"Expected: 0x{ConstantUInt32Expected:X8}"); + } + + ulong swappedUInt64 = BinaryPrimitives.ReverseEndianness(ConstantUInt64Input); + if (swappedUInt64 != ConstantUInt64Expected) + { + Console.WriteLine($"BinaryPrimitives.ReverseEndianness(const UInt32) failed."); + Console.WriteLine($"Input: 0x{ConstantUInt64Input:X16}"); + Console.WriteLine($"Output: 0x{swappedUInt64:X16}"); + Console.WriteLine($"Expected: 0x{ConstantUInt64Expected:X16}"); + } + + /* + * NON-CONST VALUE TESTS + */ + + ushort nonConstUInt16Input = (ushort)DateTime.UtcNow.Ticks; + ushort nonConstUInt16Output = BinaryPrimitives.ReverseEndianness(nonConstUInt16Input); + ushort nonConstUInt16Expected = ByteSwapUInt16_Control(nonConstUInt16Input); + if (nonConstUInt16Output != nonConstUInt16Expected) + { + Console.WriteLine($"BinaryPrimitives.ReverseEndianness(non-const UInt16) failed."); + Console.WriteLine($"Input: 0x{nonConstUInt16Input:X4}"); + Console.WriteLine($"Output: 0x{nonConstUInt16Output:X4}"); + Console.WriteLine($"Expected: 0x{nonConstUInt16Expected:X4}"); + } + + uint nonConstUInt32Input = (uint)DateTime.UtcNow.Ticks; + uint nonConstUInt32Output = BinaryPrimitives.ReverseEndianness(nonConstUInt32Input); + uint nonConstUInt32Expected = ByteSwapUInt32_Control(nonConstUInt32Input); + if (nonConstUInt32Output != nonConstUInt32Expected) + { + Console.WriteLine($"BinaryPrimitives.ReverseEndianness(non-const UInt32) failed."); + Console.WriteLine($"Input: 0x{nonConstUInt32Input:X8}"); + Console.WriteLine($"Output: 0x{nonConstUInt32Output:X8}"); + Console.WriteLine($"Expected: 0x{nonConstUInt32Expected:X8}"); + } + + ulong nonConstUInt64Input = (ulong)DateTime.UtcNow.Ticks; + ulong nonConstUInt64Output = BinaryPrimitives.ReverseEndianness(nonConstUInt64Input); + ulong nonConstUInt64Expected = ByteSwapUInt64_Control(nonConstUInt64Input); + if (nonConstUInt64Output != nonConstUInt64Expected) + { + Console.WriteLine($"BinaryPrimitives.ReverseEndianness(non-const UInt64) failed."); + Console.WriteLine($"Input: 0x{nonConstUInt64Input:X16}"); + Console.WriteLine($"Output: 0x{nonConstUInt64Output:X16}"); + Console.WriteLine($"Expected: 0x{nonConstUInt64Expected:X16}"); + } + + return Pass; + } + + private static ushort ByteSwapUInt16_Control(ushort value) + { + return (ushort)ByteSwapUnsigned_General(value, sizeof(ushort)); + } + + private static uint ByteSwapUInt32_Control(uint value) + { + return (uint)ByteSwapUnsigned_General(value, sizeof(uint)); + } + + private static ulong ByteSwapUInt64_Control(ulong value) + { + return (ulong)ByteSwapUnsigned_General(value, sizeof(ulong)); + } + + private static ulong ByteSwapUnsigned_General(ulong value, int width) + { + // A naive byte swap routine that works on integers of any arbitrary width. + // Width is specified in bytes. + + ulong retVal = 0; + do + { + retVal = retVal << 8 | (byte)value; + value >>= 8; + } while (--width > 0); + + if (value != 0) + { + // All bits of value should've been shifted out at this point. + throw new Exception("Unexpected data width specified - error in test program?"); + } + + return retVal; + } + } +} diff --git a/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness_r.csproj b/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness_r.csproj new file mode 100644 index 0000000..32ddef9 --- /dev/null +++ b/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness_r.csproj @@ -0,0 +1,35 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + true + None + + + + + + + + + + + diff --git a/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness_ro.csproj b/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness_ro.csproj new file mode 100644 index 0000000..ca7ce21 --- /dev/null +++ b/tests/src/JIT/Intrinsics/BinaryPrimitivesReverseEndianness_ro.csproj @@ -0,0 +1,35 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + true + None + True + + + + + + + + + +