Adding basic support for recognizing and handling SIMD intrinsics as HW intrinsics...
authorTanner Gooding <tagoo@outlook.com>
Tue, 5 May 2020 00:01:36 +0000 (17:01 -0700)
committerGitHub <noreply@github.com>
Tue, 5 May 2020 00:01:36 +0000 (17:01 -0700)
* Adding basic support for recognizing and handling SIMD intrinsics as HW intrinsics

* Applying formatting patch

* Fixing a preprocessor concatenation for non windows

* Add a default case to workaround a compiler warning on FreeBSD

* Fixing a noway_assert to include GT_HWINTRINSIC

* Fixing some asserts that were being triggered

* Use getSIMDVectorRegisterByteLength

* Applying formatting patch

* Fixing ARM64 to use the actual type size

* Removing the [Intrinsic] attribute from some Vector2/3/4 methods which aren't intrinsic

* Updating SSE/SSE2 CompareGreaterThan and related functions to be table driven

* Fixing the SimdAsHWIntrinsic relational operations to match the GT_SIMD behavior

* Ensure that GT_HWINTRINSIC fixes the type for certain TYP_SIMD8

* Fixing the SimdAsHWIntrinsic Vector<int>.op_Multiply support to match the GT_SIMD behavior

* Fixing the SimdAsHWIntrinsic Vector2/3 Division to match the GT_SIMD behavior

* Porting Abs, Min, and Max to use the SimdAsHWIntrinsic support

* Minor fixups to the SSE2 codepath

* Applying formatting patch

* Fixing a check in lowering

* Mark SimdAsHWIntrinsic nodes so we can lookup the correct handle

* Adding the 3 operand overload for gtNewSimdAsHWIntrinsicNode

* Fixing BuildHWIntrinsic to properly take RMW into account

* Fixing the rationalize handling of GT_HWINTRINSIC to account for SIMD vs non-SIMD nodes

* Fixing the importer to not create SIMD nodes if featureSIMD is disabled

* Fixing the SSE4.2 implementation of CompareLessThan<long>

* Preserve the base type for subtraction/addition operations

* Applying formatting patch

* Responding to PR feedback

* Fixing a copy/paste error under reinterpret cast

* Fixing abs to expect 1 argument

* Adding method comment headers that were missing

* Removing unused table entries from SimdAsHWIntrinsic for Vector2/3/4

* Ensure we catch intrinsics from the Vector static class

* Fixing SSSE3_Abs and AVX2_Abs to get the base type from the first argument

* Ensure we adjust the class handle used for intrinsics from the Vector static class

* Ensure we populate the handle cache for clsHnd even if it isn't used

* Fix where we grab the base type from for the static Vector class

* Fixing ConditionalSelect and improving the messages used for impCloneExpr in SimdAsHWIntrinsic

* Ensure we clone the constVectorDup before using it

* Applying formatting patch

26 files changed:
src/coreclr/src/jit/CMakeLists.txt
src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/flowgraph.cpp
src/coreclr/src/jit/gentree.cpp
src/coreclr/src/jit/gentree.h
src/coreclr/src/jit/hwintrinsic.cpp
src/coreclr/src/jit/hwintrinsic.h
src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp
src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/src/jit/hwintrinsiclistxarch.h
src/coreclr/src/jit/hwintrinsicxarch.cpp
src/coreclr/src/jit/importer.cpp
src/coreclr/src/jit/lower.cpp
src/coreclr/src/jit/lowerarmarch.cpp
src/coreclr/src/jit/lowerxarch.cpp
src/coreclr/src/jit/lsraxarch.cpp
src/coreclr/src/jit/namedintrinsiclist.h
src/coreclr/src/jit/rationalize.cpp
src/coreclr/src/jit/simd.cpp
src/coreclr/src/jit/simdashwintrinsic.cpp [new file with mode: 0644]
src/coreclr/src/jit/simdashwintrinsic.h [new file with mode: 0644]
src/coreclr/src/jit/simdashwintrinsiclistarm64.h [new file with mode: 0644]
src/coreclr/src/jit/simdashwintrinsiclistxarch.h [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs
src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs
src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs

index 5093ce9..8ba2c47 100644 (file)
@@ -170,6 +170,7 @@ if (CLR_CMAKE_TARGET_WIN32)
     regset.h
     sideeffects.h
     simd.h
+    simdashwintrinsic.h
     simdintrinsiclist.h
     sm.h
     smallhash.h
@@ -204,14 +205,16 @@ if (CLR_CMAKE_TARGET_WIN32)
       instrsarm.h
       instrsarm64.h
       registerarm.h
-      registerarm64.h)
+      registerarm64.h
+      simdashwintrinsiclistarm64.h)
   elseif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)
     list (APPEND JIT_HEADERS
       emitfmtsxarch.h
       emitxarch.h
       hwintrinsiclistxarch.h
       hwintrinsic.h
-      instrsxarch.h)
+      instrsxarch.h
+      simdashwintrinsiclistxarch.h)
   endif ()
 endif(CLR_CMAKE_TARGET_WIN32)
 
@@ -223,6 +226,7 @@ set( JIT_AMD64_SOURCES
   lowerxarch.cpp
   lsraxarch.cpp
   simd.cpp
+  simdashwintrinsic.cpp
   simdcodegenxarch.cpp
   targetamd64.cpp
   unwindamd64.cpp
@@ -249,6 +253,7 @@ set( JIT_I386_SOURCES
   lowerxarch.cpp
   lsraxarch.cpp
   simd.cpp
+  simdashwintrinsic.cpp
   simdcodegenxarch.cpp
   targetx86.cpp
   unwindx86.cpp
@@ -264,6 +269,7 @@ set( JIT_ARM64_SOURCES
   lsraarmarch.cpp
   lsraarm64.cpp
   simd.cpp
+  simdashwintrinsic.cpp
   targetarm64.cpp
   unwindarm.cpp
   unwindarm64.cpp
index a76847e..61939a9 100644 (file)
@@ -61,6 +61,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #include "hwintrinsic.h"
 #include "simd.h"
+#include "simdashwintrinsic.h"
 
 // This is only used locally in the JIT to indicate that
 // a verification block should be inserted
@@ -2615,6 +2616,36 @@ public:
                                                  NamedIntrinsic hwIntrinsicID,
                                                  var_types      baseType,
                                                  unsigned       size);
+
+    GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(
+        var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
+    {
+        GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, baseType, size);
+        node->gtFlags |= GTF_SIMDASHW_OP;
+        return node;
+    }
+
+    GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(
+        var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
+    {
+        GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, hwIntrinsicID, baseType, size);
+        node->gtFlags |= GTF_SIMDASHW_OP;
+        return node;
+    }
+
+    GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types      type,
+                                                   GenTree*       op1,
+                                                   GenTree*       op2,
+                                                   GenTree*       op3,
+                                                   NamedIntrinsic hwIntrinsicID,
+                                                   var_types      baseType,
+                                                   unsigned       size)
+    {
+        GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, baseType, size);
+        node->gtFlags |= GTF_SIMDASHW_OP;
+        return node;
+    }
+
     GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID);
     GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types      type,
                                                    GenTree*       op1,
@@ -3689,16 +3720,36 @@ protected:
                                        CORINFO_METHOD_HANDLE method,
                                        CORINFO_SIG_INFO*     sig,
                                        bool                  mustExpand);
+    GenTree* impSimdAsHWIntrinsic(NamedIntrinsic        intrinsic,
+                                  CORINFO_CLASS_HANDLE  clsHnd,
+                                  CORINFO_METHOD_HANDLE method,
+                                  CORINFO_SIG_INFO*     sig,
+                                  bool                  mustExpand);
 
 protected:
     bool compSupportsHWIntrinsic(CORINFO_InstructionSet isa);
 
+    GenTree* impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
+                                         CORINFO_CLASS_HANDLE clsHnd,
+                                         CORINFO_SIG_INFO*    sig,
+                                         var_types            retType,
+                                         var_types            baseType,
+                                         unsigned             simdSize);
+
+    GenTree* impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd,
+                                        var_types            retType,
+                                        var_types            baseType,
+                                        unsigned             simdSize,
+                                        GenTree*             op1,
+                                        GenTree*             op2,
+                                        GenTree*             op3);
+
     GenTree* impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                                  CORINFO_CLASS_HANDLE  clsHnd,
                                  CORINFO_METHOD_HANDLE method,
                                  CORINFO_SIG_INFO*     sig);
 
-    GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass);
+    GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr = false);
     GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType);
     GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand, int immUpperBound);
 
@@ -3712,6 +3763,13 @@ protected:
     GenTree* impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
     GenTree* impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
 
+    GenTree* impSimdAsHWIntrinsicRelOp(NamedIntrinsic       intrinsic,
+                                       CORINFO_CLASS_HANDLE clsHnd,
+                                       var_types            retType,
+                                       var_types            baseType,
+                                       unsigned             simdSize,
+                                       GenTree*             op1,
+                                       GenTree*             op2);
 #endif // TARGET_XARCH
 #endif // FEATURE_HW_INTRINSICS
     GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
@@ -8203,8 +8261,9 @@ private:
         return emitTypeSize(TYP_SIMD8);
     }
 
+public:
     // Returns the codegen type for a given SIMD size.
-    var_types getSIMDTypeForSize(unsigned size)
+    static var_types getSIMDTypeForSize(unsigned size)
     {
         var_types simdType = TYP_UNDEF;
         if (size == 8)
@@ -8230,6 +8289,7 @@ private:
         return simdType;
     }
 
+private:
     unsigned getSIMDInitTempVarNum()
     {
         if (lvaSIMDInitTempVarNum == BAD_VAR_NUM)
index 1a11b13..689fa7f 100644 (file)
@@ -22298,12 +22298,11 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call)
  */
 GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree)
 {
-    noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_FIELD) || (tree->gtOper == GT_IND) ||
-                 (tree->gtOper == GT_BLK) || (tree->gtOper == GT_OBJ) || tree->OperIsSIMD() ||
-                 // tree->gtOper == GT_CALL     || cannot get address of call.
-                 // tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode.
-                 // tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
-                 (tree->gtOper == GT_COMMA));
+    noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() ||
+                 tree->OperIsHWIntrinsic());
+    // GT_CALL,     cannot get address of call.
+    // GT_MKREFANY, inlining should've been aborted due to mkrefany opcode.
+    // GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
 
     switch (tree->OperGet())
     {
index dfab57a..279402f 100644 (file)
@@ -17206,6 +17206,12 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
                 if (varTypeIsSIMD(tree))
                 {
                     structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
+#ifdef FEATURE_HW_INTRINSICS
+                    if (structHnd == NO_CLASS_HANDLE)
+                    {
+                        structHnd = gtGetStructHandleForHWSIMD(tree->gtType, TYP_FLOAT);
+                    }
+#endif
                 }
 #endif
                 break;
@@ -17272,7 +17278,14 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
 #endif // FEATURE_SIMD
 #ifdef FEATURE_HW_INTRINSICS
             case GT_HWINTRINSIC:
-                structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
+                if ((tree->gtFlags & GTF_SIMDASHW_OP) != 0)
+                {
+                    structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
+                }
+                else
+                {
+                    structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
+                }
                 break;
 #endif
                 break;
index a3fe7aa..12645fb 100644 (file)
@@ -745,7 +745,7 @@ public:
 
 #define GTF_UNSIGNED    0x00008000 // With GT_CAST:   the source operand is an unsigned type
                                    // With operators: the specified node is an unsigned operator
-                                   // 
+                                   //
 #define GTF_LATE_ARG    0x00010000 // The specified node is evaluated to a temp in the arg list, and this temp is added to gtCallLateArgs.
 #define GTF_SPILL       0x00020000 // Needs to be spilled here
 
@@ -915,6 +915,9 @@ public:
 #define GTF_SIMD12_OP               0x80000000 // GT_SIMD -- Indicates that the operands need to be handled as SIMD12
                                                //            even if they have been retyped as SIMD16.
 
+#define GTF_SIMDASHW_OP             0x80000000 // GT_HWINTRINSIC -- Indicates that the structHandle should be gotten from gtGetStructHandleForSIMD
+                                               //                   rarther than from gtGetStructHandleForHWSIMD.
+
 //---------------------------------------------------------------------
 //
 // GenTree flags stored in gtDebugFlags.
index 0e62ffe..efcf5b8 100644 (file)
@@ -495,16 +495,17 @@ bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op)
 }
 
 //------------------------------------------------------------------------
-// // getArgForHWIntrinsic: pop an argument from the stack and validate its type
+// getArgForHWIntrinsic: pop an argument from the stack and validate its type
 //
 // Arguments:
-//    argType   -- the required type of argument
-//    argClass  -- the class handle of argType
+//    argType    -- the required type of argument
+//    argClass   -- the class handle of argType
+//    expectAddr --  if true indicates we are expecting type stack entry to be a TYP_BYREF.
 //
 // Return Value:
 //     the validated argument
 //
-GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass)
+GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr)
 {
     GenTree* arg = nullptr;
     if (argType == TYP_STRUCT)
@@ -512,9 +513,9 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE
         unsigned int argSizeBytes;
         var_types    base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes);
         argType           = getSIMDTypeForSize(argSizeBytes);
-        assert((argType == TYP_SIMD8) || (argType == TYP_SIMD16) || (argType == TYP_SIMD32));
-        arg = impSIMDPopStack(argType);
-        assert((arg->TypeGet() == TYP_SIMD8) || (arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32));
+        assert(varTypeIsSIMD(argType));
+        arg = impSIMDPopStack(argType, expectAddr);
+        assert(varTypeIsSIMD(arg->TypeGet()));
     }
     else
     {
index 960b658..fc3686f 100644 (file)
@@ -304,7 +304,7 @@ struct HWIntrinsicInfo
     }
 
 #ifdef TARGET_XARCH
-    static int lookupIval(NamedIntrinsic id)
+    static int lookupIval(NamedIntrinsic id, bool opportunisticallyDependsOnAVX)
     {
         switch (id)
         {
@@ -321,6 +321,17 @@ struct HWIntrinsicInfo
             case NI_SSE_CompareScalarGreaterThan:
             case NI_SSE2_CompareGreaterThan:
             case NI_SSE2_CompareScalarGreaterThan:
+            case NI_AVX_CompareGreaterThan:
+            {
+                if (opportunisticallyDependsOnAVX)
+                {
+                    return static_cast<int>(FloatComparisonMode::OrderedGreaterThanSignaling);
+                }
+
+                assert(id != NI_AVX_CompareGreaterThan);
+                return static_cast<int>(FloatComparisonMode::OrderedLessThanSignaling);
+            }
+
             case NI_SSE_CompareLessThan:
             case NI_SSE_CompareScalarLessThan:
             case NI_SSE2_CompareLessThan:
@@ -334,6 +345,17 @@ struct HWIntrinsicInfo
             case NI_SSE_CompareScalarGreaterThanOrEqual:
             case NI_SSE2_CompareGreaterThanOrEqual:
             case NI_SSE2_CompareScalarGreaterThanOrEqual:
+            case NI_AVX_CompareGreaterThanOrEqual:
+            {
+                if (opportunisticallyDependsOnAVX)
+                {
+                    return static_cast<int>(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling);
+                }
+
+                assert(id != NI_AVX_CompareGreaterThanOrEqual);
+                return static_cast<int>(FloatComparisonMode::OrderedLessThanOrEqualSignaling);
+            }
+
             case NI_SSE_CompareLessThanOrEqual:
             case NI_SSE_CompareScalarLessThanOrEqual:
             case NI_SSE2_CompareLessThanOrEqual:
@@ -356,6 +378,17 @@ struct HWIntrinsicInfo
             case NI_SSE_CompareScalarNotGreaterThan:
             case NI_SSE2_CompareNotGreaterThan:
             case NI_SSE2_CompareScalarNotGreaterThan:
+            case NI_AVX_CompareNotGreaterThan:
+            {
+                if (opportunisticallyDependsOnAVX)
+                {
+                    return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanSignaling);
+                }
+
+                assert(id != NI_AVX_CompareNotGreaterThan);
+                return static_cast<int>(FloatComparisonMode::UnorderedNotLessThanSignaling);
+            }
+
             case NI_SSE_CompareNotLessThan:
             case NI_SSE_CompareScalarNotLessThan:
             case NI_SSE2_CompareNotLessThan:
@@ -369,6 +402,17 @@ struct HWIntrinsicInfo
             case NI_SSE_CompareScalarNotGreaterThanOrEqual:
             case NI_SSE2_CompareNotGreaterThanOrEqual:
             case NI_SSE2_CompareScalarNotGreaterThanOrEqual:
+            case NI_AVX_CompareNotGreaterThanOrEqual:
+            {
+                if (opportunisticallyDependsOnAVX)
+                {
+                    return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling);
+                }
+
+                assert(id != NI_AVX_CompareNotGreaterThanOrEqual);
+                return static_cast<int>(FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling);
+            }
+
             case NI_SSE_CompareNotLessThanOrEqual:
             case NI_SSE_CompareScalarNotLessThanOrEqual:
             case NI_SSE2_CompareNotLessThanOrEqual:
@@ -437,26 +481,6 @@ struct HWIntrinsicInfo
                 return static_cast<int>(FloatRoundingMode::ToZero);
             }
 
-            case NI_AVX_CompareGreaterThan:
-            {
-                return static_cast<int>(FloatComparisonMode::OrderedGreaterThanSignaling);
-            }
-
-            case NI_AVX_CompareGreaterThanOrEqual:
-            {
-                return static_cast<int>(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling);
-            }
-
-            case NI_AVX_CompareNotGreaterThan:
-            {
-                return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanSignaling);
-            }
-
-            case NI_AVX_CompareNotGreaterThanOrEqual:
-            {
-                return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling);
-            }
-
             default:
             {
                 return -1;
index 767d18f..3d224d4 100644 (file)
@@ -215,7 +215,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
     }
     else
     {
-        emitSize = EA_SIZE(node->gtSIMDSize);
+        emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
         opt      = genGetSimdInsOpt(emitSize, intrin.baseType);
 
         if ((opt == INS_OPTS_1D) && (intrin.category == HW_Category_SimpleSIMD))
index 04214e1..2b6336d 100644 (file)
@@ -82,9 +82,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
     NamedIntrinsic         intrinsicId = node->gtHWIntrinsicId;
     CORINFO_InstructionSet isa         = HWIntrinsicInfo::lookupIsa(intrinsicId);
     HWIntrinsicCategory    category    = HWIntrinsicInfo::lookupCategory(intrinsicId);
-    int                    ival        = HWIntrinsicInfo::lookupIval(intrinsicId);
     int                    numArgs     = HWIntrinsicInfo::lookupNumArgs(node);
 
+    int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compiler->compOpportunisticallyDependsOn(InstructionSet_AVX));
+
     assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId));
 
     if (genIsTableDrivenHWIntrinsic(intrinsicId, category))
@@ -102,7 +103,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
         assert(numArgs >= 0);
         instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
         assert(ins != INS_invalid);
-        emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
+        emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
         assert(simdSize != 0);
 
         switch (numArgs)
@@ -254,11 +255,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if (node->TypeGet() == TYP_VOID)
                 {
-                    genHWIntrinsic_R_RM(node, ins, EA_ATTR(node->gtSIMDSize), op1Reg, op2);
+                    genHWIntrinsic_R_RM(node, ins, simdSize, op1Reg, op2);
                 }
                 else
                 {
-                    genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize));
+                    genHWIntrinsic_R_R_RM(node, ins, simdSize);
                 }
                 break;
             }
@@ -550,7 +551,7 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, i
     var_types targetType = node->TypeGet();
     regNumber targetReg  = node->GetRegNum();
     GenTree*  op1        = node->gtGetOp1();
-    emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
+    emitAttr  simdSize   = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
     emitter*  emit       = GetEmitter();
 
     // TODO-XArch-CQ: Commutative operations can have op1 be contained
@@ -632,7 +633,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins,
     regNumber targetReg  = node->GetRegNum();
     GenTree*  op1        = node->gtGetOp1();
     GenTree*  op2        = node->gtGetOp2();
-    emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
+    emitAttr  simdSize   = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
     emitter*  emit       = GetEmitter();
 
     // TODO-XArch-CQ: Commutative operations can have op1 be contained
@@ -796,7 +797,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins)
     GenTree*  op1        = node->gtGetOp1();
     GenTree*  op2        = node->gtGetOp2();
     GenTree*  op3        = nullptr;
-    emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
+    emitAttr  simdSize   = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
     emitter*  emit       = GetEmitter();
 
     assert(op1->OperIsList());
@@ -1150,7 +1151,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
     assert(node->gtGetOp2() == nullptr);
 
     emitter*    emit = GetEmitter();
-    emitAttr    attr = EA_ATTR(node->gtSIMDSize);
+    emitAttr    attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
     instruction ins  = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
 
     switch (intrinsicId)
@@ -1408,25 +1409,6 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
 
     switch (intrinsicId)
     {
-        // All integer overloads are handled by table codegen
-        case NI_SSE2_CompareLessThan:
-        {
-            assert(op1 != nullptr);
-            assert(op2 != nullptr);
-
-            assert(baseType == TYP_DOUBLE);
-
-            int ival = HWIntrinsicInfo::lookupIval(intrinsicId);
-            assert((ival >= 0) && (ival <= 127));
-
-            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-            op1Reg          = op1->GetRegNum();
-            op2Reg          = op2->GetRegNum();
-            emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);
-
-            break;
-        }
-
         case NI_SSE2_X64_ConvertScalarToVector128Double:
         {
             assert(baseType == TYP_LONG);
@@ -1677,7 +1659,7 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
     var_types      baseType    = node->gtSIMDBaseType;
-    emitAttr       attr        = EA_ATTR(node->gtSIMDSize);
+    emitAttr       attr        = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
     var_types      targetType  = node->TypeGet();
     instruction    ins         = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
     int            numArgs     = HWIntrinsicInfo::lookupNumArgs(node);
@@ -1990,7 +1972,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
     var_types      baseType    = node->gtSIMDBaseType;
-    emitAttr       attr        = EA_ATTR(node->gtSIMDSize);
+    emitAttr       attr        = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
     instruction    ins         = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
     GenTree*       op1         = node->gtGetOp1();
     regNumber      targetReg   = node->GetRegNum();
@@ -2016,16 +1998,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
     // Intrinsics with CopyUpperBits semantics cannot have op1 be contained
     assert(!copiesUpperBits || !op1->isContained());
 
-    if (op3->isContained() || op3->isUsedFromSpillTemp())
-    {
-        // 213 form: op1 = (op2 * op1) + [op3]
-
-        op1Reg = op1->GetRegNum();
-        op2Reg = op2->GetRegNum();
-
-        isCommutative = !copiesUpperBits;
-    }
-    else if (op2->isContained() || op2->isUsedFromSpillTemp())
+    if (op2->isContained() || op2->isUsedFromSpillTemp())
     {
         // 132 form: op1 = (op1 * op3) + [op2]
 
@@ -2045,7 +2018,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
     }
     else
     {
-        // 213 form: op1 = (op2 * op1) + op3
+        // 213 form: op1 = (op2 * op1) + [op3]
 
         op1Reg = op1->GetRegNum();
         op2Reg = op2->GetRegNum();
index 49240a4..4be0cda 100644 (file)
@@ -111,11 +111,11 @@ HARDWARE_INTRINSIC(SSE,             CompareEqual,
 HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             CompareScalarEqual,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedEqual,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE,             CompareGreaterThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareGreaterThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedGreaterThan,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             CompareScalarGreaterThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedGreaterThan,          16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE,             CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedGreaterThanOrEqual,     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             CompareScalarGreaterThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedGreaterThanOrEqual,   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
@@ -131,9 +131,9 @@ HARDWARE_INTRINSIC(SSE,             CompareNotEqual,
 HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedNotEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             CompareScalarNotEqual,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedNotEqual,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE,             CompareScalarNotGreaterThan,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE,             CompareScalarNotGreaterThanOrEqual,         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE,             CompareNotLessThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE,             CompareScalarNotLessThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
@@ -212,15 +212,15 @@ HARDWARE_INTRINSIC(SSE2,            CompareEqual,
 HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarEqual,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedEqual,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            CompareGreaterThan,                         16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareGreaterThan,                         16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedGreaterThan,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarGreaterThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedGreaterThan,          16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedGreaterThanOrEqual,     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarGreaterThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedGreaterThanOrEqual,   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            CompareLessThan,                            16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_Special,                HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            CompareLessThan,                            16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedLessThan,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarLessThan,                      16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedLessThan,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
@@ -232,9 +232,9 @@ HARDWARE_INTRINSIC(SSE2,            CompareNotEqual,
 HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedNotEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarNotEqual,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedNotEqual,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarNotGreaterThan,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarNotGreaterThanOrEqual,         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            CompareNotLessThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarNotLessThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
@@ -338,7 +338,7 @@ HARDWARE_INTRINSIC(SSE3,            MoveLowAndDuplicate,
 //                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSSE3 Intrinsics
-HARDWARE_INTRINSIC(SSSE3,           Abs,                                        16,              1,     {INS_invalid,           INS_pabsb,              INS_invalid,            INS_pabsw,              INS_invalid,            INS_pabsd,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSSE3,           Abs,                                        16,              1,     {INS_pabsb,             INS_invalid,            INS_pabsw,              INS_invalid,            INS_pabsd,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(SSSE3,           AlignRight,                                 16,              3,     {INS_palignr,           INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSSE3,           HorizontalAdd,                              16,              2,     {INS_invalid,           INS_invalid,            INS_phaddw,             INS_invalid,            INS_phaddd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSSE3,           HorizontalAddSaturate,                      16,              2,     {INS_invalid,           INS_invalid,            INS_phaddsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -404,6 +404,7 @@ HARDWARE_INTRINSIC(SSE41_X64,       Insert,
 //  SSE42 Intrinsics
 HARDWARE_INTRINSIC(SSE42,           Crc32,                                       0,              2,     {INS_invalid,           INS_crc32,              INS_invalid,            INS_crc32,              INS_invalid,            INS_crc32,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
 HARDWARE_INTRINSIC(SSE42,           CompareGreaterThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE42,           CompareLessThan,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
@@ -428,7 +429,6 @@ HARDWARE_INTRINSIC(AVX,             BroadcastScalarToVector128,
 HARDWARE_INTRINSIC(AVX,             BroadcastScalarToVector256,                 32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastss,       INS_vbroadcastsd},      HW_Category_MemoryLoad,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             BroadcastVector128ToVector256,              32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastf128,     INS_vbroadcastf128},    HW_Category_MemoryLoad,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             Compare,                                    32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_IMM,                    HW_Flag_NoFlag)
-
 HARDWARE_INTRINSIC(AVX,             CompareEqual,                               32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX,             CompareGreaterThan,                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             CompareGreaterThanOrEqual,                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -441,7 +441,6 @@ HARDWARE_INTRINSIC(AVX,             CompareNotLessThan,
 HARDWARE_INTRINSIC(AVX,             CompareNotLessThanOrEqual,                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             CompareOrdered,                             32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             CompareUnordered,                           32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-
 HARDWARE_INTRINSIC(AVX,             CompareScalar,                              16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_cmpsd},             HW_Category_IMM,                    HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector128Single,                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
@@ -497,7 +496,7 @@ HARDWARE_INTRINSIC(AVX,             Xor,
 //                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  AVX2 Intrinsics
-HARDWARE_INTRINSIC(AVX2,            Abs,                                        32,              1,     {INS_pabsb,             INS_pabsb,              INS_pabsw,              INS_pabsw,              INS_pabsd,              INS_pabsd,              INS_paddq,              INS_paddq,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2,            Abs,                                        32,              1,     {INS_pabsb,             INS_invalid,            INS_pabsw,              INS_invalid,            INS_pabsd,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2,            Add,                                        32,              2,     {INS_paddb,             INS_paddb,              INS_paddw,              INS_paddw,              INS_paddd,              INS_paddd,              INS_paddq,              INS_paddq,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2,            AddSaturate,                                32,              2,     {INS_paddsb,            INS_paddusb,            INS_paddsw,             INS_paddusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2,            AlignRight,                                 32,              3,     {INS_palignr,           INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
@@ -511,13 +510,14 @@ HARDWARE_INTRINSIC(AVX2,            BroadcastScalarToVector256,
 HARDWARE_INTRINSIC(AVX2,            BroadcastVector128ToVector256,              32,              1,     {INS_vbroadcasti128,    INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            CompareEqual,                               32,              2,     {INS_pcmpeqb,           INS_pcmpeqb,            INS_pcmpeqw,            INS_pcmpeqw,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqq,            INS_pcmpeqq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2,            CompareGreaterThan,                         32,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            CompareLessThan,                            32,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            ExtractVector128,                           32,              2,     {INS_vextracti128,      INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2,            ConvertToInt32,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2,            ConvertToUInt32,                            32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int16,                    32,              1,     {INS_pmovsxbw,          INS_pmovzxbw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int32,                    32,              1,     {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int64,                    32,              1,     {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX2,            GatherVector128,                            16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2,            GatherVector128,                            16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(AVX2,            GatherVector256,                            32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(AVX2,            GatherMaskVector128,                        16,              5,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(AVX2,            GatherMaskVector256,                        32,              5,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
index 3125f10..010cd5f 100644 (file)
@@ -1288,35 +1288,6 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND
 
     switch (intrinsic)
     {
-        case NI_SSE_CompareGreaterThan:
-        case NI_SSE_CompareGreaterThanOrEqual:
-        case NI_SSE_CompareNotGreaterThan:
-        case NI_SSE_CompareNotGreaterThanOrEqual:
-        {
-            assert(sig->numArgs == 2);
-            op2      = impSIMDPopStack(TYP_SIMD16);
-            op1      = impSIMDPopStack(TYP_SIMD16);
-            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
-            assert(baseType == TYP_FLOAT);
-
-            if (compOpportunisticallyDependsOn(InstructionSet_AVX))
-            {
-                // These intrinsics are "special import" because the non-AVX path isn't directly
-                // hardware supported. Instead, they start with "swapped operands" and we fix that here.
-
-                FloatComparisonMode comparison =
-                    static_cast<FloatComparisonMode>(HWIntrinsicInfo::lookupIval(intrinsic));
-                comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison);
-                retNode    = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast<int>(comparison)),
-                                                   NI_AVX_Compare, baseType, simdSize);
-            }
-            else
-            {
-                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize);
-            }
-            break;
-        }
-
         case NI_SSE_CompareScalarGreaterThan:
         case NI_SSE_CompareScalarGreaterThanOrEqual:
         case NI_SSE_CompareScalarNotGreaterThan:
@@ -1334,9 +1305,8 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND
                 // hardware supported. Instead, they start with "swapped operands" and we fix that here.
 
                 FloatComparisonMode comparison =
-                    static_cast<FloatComparisonMode>(HWIntrinsicInfo::lookupIval(intrinsic));
-                comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison);
-                retNode    = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast<int>(comparison)),
+                    static_cast<FloatComparisonMode>(HWIntrinsicInfo::lookupIval(intrinsic, true));
+                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast<int>(comparison)),
                                                    NI_AVX_CompareScalar, baseType, simdSize);
             }
             else
@@ -1395,68 +1365,6 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN
 
     switch (intrinsic)
     {
-        case NI_SSE2_CompareGreaterThan:
-        {
-            if (baseType != TYP_DOUBLE)
-            {
-                assert(sig->numArgs == 2);
-                op2 = impSIMDPopStack(TYP_SIMD16);
-                op1 = impSIMDPopStack(TYP_SIMD16);
-
-                retNode =
-                    gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_CompareGreaterThan, baseType, simdSize);
-
-                break;
-            }
-
-            __fallthrough;
-        }
-
-        case NI_SSE2_CompareGreaterThanOrEqual:
-        case NI_SSE2_CompareNotGreaterThan:
-        case NI_SSE2_CompareNotGreaterThanOrEqual:
-        {
-            assert(sig->numArgs == 2);
-            op2 = impSIMDPopStack(TYP_SIMD16);
-            op1 = impSIMDPopStack(TYP_SIMD16);
-            assert(baseType == TYP_DOUBLE);
-
-            if (compOpportunisticallyDependsOn(InstructionSet_AVX))
-            {
-                // These intrinsics are "special import" because the non-AVX path isn't directly
-                // hardware supported. Instead, they start with "swapped operands" and we fix that here.
-
-                FloatComparisonMode comparison =
-                    static_cast<FloatComparisonMode>(HWIntrinsicInfo::lookupIval(intrinsic));
-                comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison);
-                retNode    = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast<int>(comparison)),
-                                                   NI_AVX_Compare, baseType, simdSize);
-            }
-            else
-            {
-                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize);
-            }
-            break;
-        }
-
-        case NI_SSE2_CompareLessThan:
-        {
-            assert(sig->numArgs == 2);
-            op2 = impSIMDPopStack(TYP_SIMD16);
-            op1 = impSIMDPopStack(TYP_SIMD16);
-
-            if (baseType == TYP_DOUBLE)
-            {
-                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize);
-            }
-            else
-            {
-                retNode =
-                    gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareGreaterThan, baseType, simdSize);
-            }
-            break;
-        }
-
         case NI_SSE2_CompareScalarGreaterThan:
         case NI_SSE2_CompareScalarGreaterThanOrEqual:
         case NI_SSE2_CompareScalarNotGreaterThan:
@@ -1473,9 +1381,8 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN
                 // hardware supported. Instead, they start with "swapped operands" and we fix that here.
 
                 FloatComparisonMode comparison =
-                    static_cast<FloatComparisonMode>(HWIntrinsicInfo::lookupIval(intrinsic));
-                comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison);
-                retNode    = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast<int>(comparison)),
+                    static_cast<FloatComparisonMode>(HWIntrinsicInfo::lookupIval(intrinsic, true));
+                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast<int>(comparison)),
                                                    NI_AVX_CompareScalar, baseType, simdSize);
             }
             else
index 99d5160..c5f01f3 100644 (file)
@@ -3497,6 +3497,11 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
 
                 return hwintrinsic;
             }
+
+            if ((ni > NI_SIMD_AS_HWINTRINSIC_START) && (ni < NI_SIMD_AS_HWINTRINSIC_END))
+            {
+                return impSimdAsHWIntrinsic(ni, clsHnd, method, sig, mustExpand);
+            }
 #endif // FEATURE_HW_INTRINSICS
         }
     }
@@ -4152,7 +4157,7 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
             case NI_System_MathF_FusedMultiplyAdd:
             {
 #ifdef TARGET_XARCH
-                if (compExactlyDependsOn(InstructionSet_FMA))
+                if (compExactlyDependsOn(InstructionSet_FMA) && supportSIMDTypes())
                 {
                     assert(varTypeIsFloating(callType));
 
@@ -4467,6 +4472,15 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
         }
     }
 #ifdef FEATURE_HW_INTRINSICS
+    else if (strcmp(namespaceName, "System.Numerics") == 0)
+    {
+        CORINFO_SIG_INFO sig;
+        info.compCompHnd->getMethodSig(method, &sig);
+
+        int sizeOfVectorT = getSIMDVectorRegisterByteLength();
+
+        result = SimdAsHWIntrinsicInfo::lookupId(&sig, className, methodName, enclosingClassName, sizeOfVectorT);
+    }
     else if (strncmp(namespaceName, "System.Runtime.Intrinsics", 25) == 0)
     {
         namespaceName += 25;
index 48345ac..3a4a660 100644 (file)
@@ -1331,11 +1331,12 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
             LclVarDsc* varDsc = &comp->lvaTable[varNum];
             type              = varDsc->lvType;
         }
-        else if (arg->OperGet() == GT_SIMD)
+        else if (arg->OperIs(GT_SIMD, GT_HWINTRINSIC))
         {
-            assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12));
+            GenTreeJitIntrinsic* jitIntrinsic = reinterpret_cast<GenTreeJitIntrinsic*>(arg);
+            assert((jitIntrinsic->gtSIMDSize == 12) || (jitIntrinsic->gtSIMDSize == 16));
 
-            if (arg->AsSIMD()->gtSIMDSize == 12)
+            if (jitIntrinsic->gtSIMDSize == 12)
             {
                 type = TYP_SIMD12;
             }
@@ -5288,6 +5289,7 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node)
 
 #ifdef FEATURE_SIMD
         case GT_SIMD:
+        case GT_HWINTRINSIC:
             assert(node->TypeGet() != TYP_SIMD12);
             break;
 #ifdef TARGET_64BIT
index 4ae61be..75dfc14 100644 (file)
@@ -526,6 +526,15 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
 //
 void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 {
+    assert(node->TypeGet() != TYP_SIMD32);
+
+    if (node->TypeGet() == TYP_SIMD12)
+    {
+        // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact
+        // produces a TYP_SIMD16 result
+        node->gtType = TYP_SIMD16;
+    }
+
     ContainCheckHWIntrinsic(node);
 }
 #endif // FEATURE_HW_INTRINSICS
index 92e9965..ad0426b 100644 (file)
@@ -920,8 +920,61 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node)
 //
 void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 {
+    if (node->TypeGet() == TYP_SIMD12)
+    {
+        // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact
+        // produces a TYP_SIMD16 result
+        node->gtType = TYP_SIMD16;
+    }
+
     switch (node->gtHWIntrinsicId)
     {
+        case NI_SSE2_CompareGreaterThan:
+        {
+            if (node->gtSIMDBaseType != TYP_DOUBLE)
+            {
+                assert(varTypeIsIntegral(node->gtSIMDBaseType));
+                break;
+            }
+
+            __fallthrough;
+        }
+
+        case NI_SSE_CompareGreaterThan:
+        case NI_SSE_CompareGreaterThanOrEqual:
+        case NI_SSE_CompareNotGreaterThan:
+        case NI_SSE_CompareNotGreaterThanOrEqual:
+        case NI_SSE2_CompareGreaterThanOrEqual:
+        case NI_SSE2_CompareNotGreaterThan:
+        case NI_SSE2_CompareNotGreaterThanOrEqual:
+        {
+            assert((node->gtSIMDBaseType == TYP_FLOAT) || (node->gtSIMDBaseType == TYP_DOUBLE));
+
+            if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX))
+            {
+                break;
+            }
+
+            // pre-AVX doesn't actually support these intrinsics in hardware so we need to swap the operands around
+            std::swap(node->gtOp1, node->gtOp2);
+            break;
+        }
+
+        case NI_SSE2_CompareLessThan:
+        case NI_SSE42_CompareLessThan:
+        case NI_AVX2_CompareLessThan:
+        {
+            if (node->gtSIMDBaseType == TYP_DOUBLE)
+            {
+                break;
+            }
+            assert(varTypeIsIntegral(node->gtSIMDBaseType));
+
+            // this isn't actually supported in hardware so we need to swap the operands around
+            std::swap(node->gtOp1, node->gtOp2);
+            break;
+        }
+
         case NI_SSE_CompareScalarOrderedEqual:
             LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FEQ);
             break;
@@ -2655,7 +2708,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
             switch (containingIntrinsicId)
             {
                 case NI_SSE_Shuffle:
-                case NI_SSE2_CompareLessThan:
                 case NI_SSE2_ShiftLeftLogical:
                 case NI_SSE2_ShiftRightArithmetic:
                 case NI_SSE2_ShiftRightLogical:
@@ -2975,6 +3027,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
     HWIntrinsicCategory category    = HWIntrinsicInfo::lookupCategory(intrinsicId);
     int                 numArgs     = HWIntrinsicInfo::lookupNumArgs(node);
     var_types           baseType    = node->gtSIMDBaseType;
+    unsigned            simdSize    = node->gtSIMDSize;
 
     GenTree* op1 = node->gtGetOp1();
     GenTree* op2 = node->gtGetOp2();
@@ -2993,6 +3046,24 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
         return;
     }
 
+    if (HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_IMM)
+    {
+        GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node);
+        assert(lastOp != nullptr);
+
+        if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI())
+        {
+            MakeSrcContained(node, lastOp);
+        }
+    }
+
+    if ((node->gtSIMDSize == 8) || (node->gtSIMDSize == 12))
+    {
+        // TODO-XArch-CQ: Ideally we would key this off of the size containingNode
+        // expects vs the size node actually is or would be if spilled to the stack
+        return;
+    }
+
     // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
 
     const bool isCommutative = HWIntrinsicInfo::IsCommutative(intrinsicId);
@@ -3270,28 +3341,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     break;
                 }
 
-                case HW_Category_Special:
-                {
-                    if (intrinsicId == NI_SSE2_CompareLessThan)
-                    {
-                        bool supportsRegOptional = false;
-
-                        if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
-                        {
-                            MakeSrcContained(node, op2);
-                        }
-                        else if (supportsRegOptional)
-                        {
-                            op2->SetRegOptional();
-                        }
-                    }
-                    else
-                    {
-                        unreached();
-                    }
-                    break;
-                }
-
                 default:
                 {
                     unreached();
@@ -3479,17 +3528,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
         {
             unreached();
         }
-
-        if (HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_IMM)
-        {
-            GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node);
-            assert(lastOp != nullptr);
-
-            if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI())
-            {
-                MakeSrcContained(node, lastOp);
-            }
-        }
     }
 }
 #endif // FEATURE_HW_INTRINSICS
index 524de87..f784b55 100644 (file)
@@ -2458,8 +2458,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
                     assert(isRMW);
 
                     // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
-                    srcCount += BuildOperandUses(op1);
-                    srcCount += BuildDelayFreeUses(op2);
+                    tgtPrefUse = BuildUse(op1);
+
+                    srcCount += 1;
+                    srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2);
                     srcCount += BuildDelayFreeUses(op3, RBM_XMM0);
 
                     buildUses = false;
@@ -2493,7 +2495,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
                 assert(isRMW);
 
                 // CRC32 may operate over "byte" but on x86 only RBM_BYTE_REGS can be used as byte registers.
-                srcCount += BuildOperandUses(op1);
+                tgtPrefUse = BuildUse(op1);
+
+                srcCount += 1;
                 srcCount += BuildDelayFreeUses(op2, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE);
 
                 buildUses = false;
@@ -2539,29 +2543,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
                 // Intrinsics with CopyUpperBits semantics cannot have op1 be contained
                 assert(!copiesUpperBits || !op1->isContained());
 
-                if (op3->isContained())
-                {
-                    // 213 form: op1 = (op2 * op1) + [op3]
-
-                    if (copiesUpperBits)
-                    {
-                        tgtPrefUse = BuildUse(op1);
-
-                        srcCount += 1;
-                        srcCount += BuildDelayFreeUses(op2);
-                    }
-                    else
-                    {
-                        // op1 and op2 are commutative, so don't
-                        // set either to be tgtPref or delayFree
-
-                        srcCount += BuildOperandUses(op1);
-                        srcCount += BuildOperandUses(op2);
-                    }
-
-                    srcCount += BuildOperandUses(op3);
-                }
-                else if (op2->isContained())
+                if (op2->isContained())
                 {
                     // 132 form: op1 = (op1 * op3) + [op2]
 
@@ -2583,25 +2565,22 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
                 }
                 else
                 {
-                    // 213 form: op1 = (op2 * op1) + op3
+                    // 213 form: op1 = (op2 * op1) + [op3]
+
+                    tgtPrefUse = BuildUse(op1);
+                    srcCount += 1;
 
                     if (copiesUpperBits)
                     {
-                        tgtPrefUse = BuildUse(op1);
-
-                        srcCount += 1;
                         srcCount += BuildDelayFreeUses(op2);
                     }
                     else
                     {
-                        // op1 and op2 are commutative, so don't
-                        // set either to be tgtPref or delayFree
-
-                        srcCount += BuildOperandUses(op1);
-                        srcCount += BuildOperandUses(op2);
+                        tgtPrefUse2 = BuildUse(op2);
+                        srcCount += 1;
                     }
 
-                    srcCount += BuildDelayFreeUses(op3);
+                    srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3);
                 }
 
                 buildUses = false;
@@ -2612,10 +2591,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
             case NI_AVX2_GatherVector256:
             {
                 assert(numArgs == 3);
+                assert(!isRMW);
+
                 // Any pair of the index, mask, or destination registers should be different
                 srcCount += BuildOperandUses(op1);
                 srcCount += BuildDelayFreeUses(op2);
 
+                // op3 should always be contained
+                assert(op3->isContained());
+
                 // get a tmp register for mask that will be cleared by gather instructions
                 buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
                 setInternalRegsDelayFree = true;
@@ -2628,16 +2612,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
             case NI_AVX2_GatherMaskVector256:
             {
                 assert(numArgs == 5);
+                assert(!isRMW);
+                assert(intrinsicTree->gtGetOp1()->OperIsList());
+
+                GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest();
+                GenTree*        op4     = argList->Current();
+
                 // Any pair of the index, mask, or destination registers should be different
                 srcCount += BuildOperandUses(op1);
-                srcCount += BuildOperandUses(op2);
+                srcCount += BuildDelayFreeUses(op2);
                 srcCount += BuildDelayFreeUses(op3);
-
-                assert(intrinsicTree->gtGetOp1()->OperIsList());
-                GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList();
-                GenTree*        op4     = argList->Rest()->Rest()->Rest()->Current();
                 srcCount += BuildDelayFreeUses(op4);
 
+                // op5 should always be contained
+                assert(argList->Rest()->Current()->isContained());
+
                 // get a tmp register for mask that will be cleared by gather instructions
                 buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
                 setInternalRegsDelayFree = true;
@@ -2661,6 +2650,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
             {
                 srcCount += BuildAddrUses(op1);
             }
+            else if (isRMW && !op1->isContained())
+            {
+                tgtPrefUse = BuildUse(op1);
+                srcCount += 1;
+            }
             else
             {
                 srcCount += BuildOperandUses(op1);
@@ -2672,9 +2666,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
                 {
                     srcCount += BuildAddrUses(op2->gtGetOp1());
                 }
-                else if (isRMW)
+                else if (isRMW && !op2->isContained())
                 {
-                    srcCount += BuildDelayFreeUses(op2);
+                    if (HWIntrinsicInfo::IsCommutative(intrinsicId))
+                    {
+                        tgtPrefUse2 = BuildUse(op2);
+                        srcCount += 1;
+                    }
+                    else
+                    {
+                        srcCount += BuildDelayFreeUses(op2);
+                    }
                 }
                 else
                 {
@@ -2683,7 +2685,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
 
                 if (op3 != nullptr)
                 {
-                    srcCount += (isRMW) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
+                    srcCount += isRMW ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
                 }
             }
         }
index f4969a5..d105eab 100644 (file)
@@ -38,6 +38,16 @@ enum NamedIntrinsic : unsigned short
 #include "hwintrinsiclistarm64.h"
 #endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64)
     NI_HW_INTRINSIC_END,
+
+    NI_SIMD_AS_HWINTRINSIC_START,
+#if defined(TARGET_XARCH)
+#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name,
+#include "simdashwintrinsiclistxarch.h"
+#elif defined(TARGET_ARM64)
+#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name,
+#include "simdashwintrinsiclistarm64.h"
+#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64)
+    NI_SIMD_AS_HWINTRINSIC_END,
 #endif // FEATURE_HW_INTRINSICS
 
 };
index 9f45bed..07bdd29 100644 (file)
@@ -768,6 +768,32 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge
         break;
 #endif // FEATURE_SIMD
 
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HWINTRINSIC:
+        {
+            GenTreeHWIntrinsic* hwIntrinsicNode = node->AsHWIntrinsic();
+
+            if (!hwIntrinsicNode->isSIMD())
+            {
+                break;
+            }
+
+            noway_assert(comp->supportSIMDTypes());
+
+            // TODO-1stClassStructs: This should be handled more generally for enregistered or promoted
+            // structs that are passed or returned in a different register type than their enregistered
+            // type(s).
+            if ((hwIntrinsicNode->gtType == TYP_I_IMPL) && (hwIntrinsicNode->gtSIMDSize == TARGET_POINTER_SIZE))
+            {
+                // This happens when it is consumed by a GT_RET_EXPR.
+                // It can only be a Vector2f or Vector2i.
+                assert(genTypeSize(hwIntrinsicNode->gtSIMDBaseType) == 4);
+                hwIntrinsicNode->gtType = TYP_SIMD8;
+            }
+            break;
+        }
+#endif // FEATURE_HW_INTRINSICS
+
         default:
             // These nodes should not be present in HIR.
             assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP, GT_LOCKADD));
index 9077971..bfd8c04 100644 (file)
@@ -162,11 +162,13 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
         if (typeHnd == m_simdHandleCache->SIMDFloatHandle)
         {
             simdBaseType = TYP_FLOAT;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<Float>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDIntHandle)
         {
             simdBaseType = TYP_INT;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<Int>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDVector2Handle)
@@ -192,46 +194,55 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
         }
         else if (typeHnd == m_simdHandleCache->SIMDVectorHandle)
         {
+            size = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type Vector\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDUShortHandle)
         {
             simdBaseType = TYP_USHORT;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<ushort>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDUByteHandle)
         {
             simdBaseType = TYP_UBYTE;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<ubyte>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDDoubleHandle)
         {
             simdBaseType = TYP_DOUBLE;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<Double>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDLongHandle)
         {
             simdBaseType = TYP_LONG;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<Long>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDShortHandle)
         {
             simdBaseType = TYP_SHORT;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<short>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDByteHandle)
         {
             simdBaseType = TYP_BYTE;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<byte>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDUIntHandle)
         {
             simdBaseType = TYP_UINT;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<uint>\n");
         }
         else if (typeHnd == m_simdHandleCache->SIMDULongHandle)
         {
             simdBaseType = TYP_ULONG;
+            size         = getSIMDVectorRegisterByteLength();
             JITDUMP("  Known type SIMD Vector<ulong>\n");
         }
 
@@ -253,6 +264,8 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
             {
                 if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
                 {
+                    size = getSIMDVectorRegisterByteLength();
+
                     if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
                     {
                         m_simdHandleCache->SIMDFloatHandle = typeHnd;
@@ -348,6 +361,7 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
                 else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
                 {
                     m_simdHandleCache->SIMDVectorHandle = typeHnd;
+                    size                                = getSIMDVectorRegisterByteLength();
                     JITDUMP(" Found type Vector\n");
                 }
                 else
@@ -356,18 +370,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
                 }
             }
         }
-        if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
-        {
-            // If not a fixed size vector then its size is same as SIMD vector
-            // register length in bytes
-            if (size == 0)
-            {
-                size = getSIMDVectorRegisterByteLength();
-            }
-
-            *sizeBytes = size;
-            setUsesSIMDTypes(true);
-        }
     }
 #ifdef FEATURE_HW_INTRINSICS
     else if (isIntrinsicType(typeHnd))
@@ -776,18 +778,18 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
             simdBaseType = TYP_UNKNOWN;
         }
 #endif // TARGET_XARCH
+    }
+#endif // FEATURE_HW_INTRINSICS
 
-        if (sizeBytes != nullptr)
-        {
-            *sizeBytes = size;
-        }
+    if (sizeBytes != nullptr)
+    {
+        *sizeBytes = size;
+    }
 
-        if (simdBaseType != TYP_UNKNOWN)
-        {
-            setUsesSIMDTypes(true);
-        }
+    if (simdBaseType != TYP_UNKNOWN)
+    {
+        setUsesSIMDTypes(true);
     }
-#endif // FEATURE_HW_INTRINSICS
 
     return simdBaseType;
 }
diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp
new file mode 100644 (file)
index 0000000..1463ed3
--- /dev/null
@@ -0,0 +1,1110 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#include "simdashwintrinsic.h"
+
+#ifdef FEATURE_HW_INTRINSICS
+
+static const SimdAsHWIntrinsicInfo simdAsHWIntrinsicInfoArray[] = {
+// clang-format off
+#if defined(TARGET_XARCH)
+#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag)                      \
+    {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast<SimdAsHWIntrinsicFlag>(flag)},
+#include "simdashwintrinsiclistxarch.h"
+#elif defined(TARGET_ARM64)
+#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag)                      \
+    {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast<SimdAsHWIntrinsicFlag>(flag)},
+#include "simdashwintrinsiclistarm64.h"
+#else
+#error Unsupported platform
+#endif
+    // clang-format on
+};
+
+//------------------------------------------------------------------------
+// lookup: Gets the SimdAsHWIntrinsicInfo associated with a given NamedIntrinsic
+//
+// Arguments:
+//    id -- The NamedIntrinsic associated with the SimdAsHWIntrinsic to lookup
+//
+// Return Value:
+//    The SimdAsHWIntrinsicInfo associated with id
+const SimdAsHWIntrinsicInfo& SimdAsHWIntrinsicInfo::lookup(NamedIntrinsic id)
+{
+    assert(id != NI_Illegal);
+
+    assert(id > NI_SIMD_AS_HWINTRINSIC_START);
+    assert(id < NI_SIMD_AS_HWINTRINSIC_END);
+
+    return simdAsHWIntrinsicInfoArray[id - NI_SIMD_AS_HWINTRINSIC_START - 1];
+}
+
+//------------------------------------------------------------------------
+// lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet
+//
+// Arguments:
+//    className          -- The name of the class associated with the SimdIntrinsic to lookup
+//    methodName         -- The name of the method associated with the SimdIntrinsic to lookup
+//    enclosingClassName -- The name of the enclosing class
+//    sizeOfVectorT      -- The size of Vector<T> in bytes
+//
+// Return Value:
+//    The NamedIntrinsic associated with methodName and classId
+NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig,
+                                               const char*       className,
+                                               const char*       methodName,
+                                               const char*       enclosingClassName,
+                                               int               sizeOfVectorT)
+{
+    SimdAsHWIntrinsicClassId classId = lookupClassId(className, enclosingClassName, sizeOfVectorT);
+
+    if (classId == SimdAsHWIntrinsicClassId::Unknown)
+    {
+        return NI_Illegal;
+    }
+
+    for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++)
+    {
+        const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i];
+
+        if (classId != intrinsicInfo.classId)
+        {
+            continue;
+        }
+
+        if (sig->numArgs != static_cast<unsigned>(intrinsicInfo.numArgs))
+        {
+            continue;
+        }
+
+        if (sig->hasThis() != SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsicInfo.id))
+        {
+            continue;
+        }
+
+        if (strcmp(methodName, intrinsicInfo.name) != 0)
+        {
+            continue;
+        }
+
+        return intrinsicInfo.id;
+    }
+
+    return NI_Illegal;
+}
+
+//------------------------------------------------------------------------
+// lookupClassId: Gets the SimdAsHWIntrinsicClassId for a given class name and enclsoing class name
+//
+// Arguments:
+//    className          -- The name of the class associated with the SimdAsHWIntrinsicClassId to lookup
+//    enclosingClassName -- The name of the enclosing class
+//    sizeOfVectorT      -- The size of Vector<T> in bytes
+//
+// Return Value:
+//    The SimdAsHWIntrinsicClassId associated with className and enclosingClassName
+SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* className,
+                                                              const char* enclosingClassName,
+                                                              int         sizeOfVectorT)
+{
+    assert(className != nullptr);
+
+    if ((enclosingClassName != nullptr) || (className[0] != 'V'))
+    {
+        return SimdAsHWIntrinsicClassId::Unknown;
+    }
+    if (strcmp(className, "Vector2") == 0)
+    {
+        return SimdAsHWIntrinsicClassId::Vector2;
+    }
+    if (strcmp(className, "Vector3") == 0)
+    {
+        return SimdAsHWIntrinsicClassId::Vector3;
+    }
+    if (strcmp(className, "Vector4") == 0)
+    {
+        return SimdAsHWIntrinsicClassId::Vector4;
+    }
+    if ((strcmp(className, "Vector") == 0) || (strcmp(className, "Vector`1") == 0))
+    {
+#if defined(TARGET_XARCH)
+        if (sizeOfVectorT == 32)
+        {
+            return SimdAsHWIntrinsicClassId::VectorT256;
+        }
+#endif // TARGET_XARCH
+
+        assert(sizeOfVectorT == 16);
+        return SimdAsHWIntrinsicClassId::VectorT128;
+    }
+
+    return SimdAsHWIntrinsicClassId::Unknown;
+}
+
+//------------------------------------------------------------------------
+// impSimdAsIntrinsic: Import a SIMD intrinsic as a GT_HWINTRINSIC node if possible
+//
+// Arguments:
+//    intrinsic  -- id of the intrinsic function.
+//    clsHnd     -- class handle containing the intrinsic function.
+//    method     -- method handle of the intrinsic function.
+//    sig        -- signature of the intrinsic call
+//    mustExpand -- true if the intrinsic must return a GenTree*; otherwise, false
+//
+// Return Value:
+//    The GT_HWINTRINSIC node, or nullptr if not a supported intrinsic
+//
+GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic        intrinsic,
+                                        CORINFO_CLASS_HANDLE  clsHnd,
+                                        CORINFO_METHOD_HANDLE method,
+                                        CORINFO_SIG_INFO*     sig,
+                                        bool                  mustExpand)
+{
+    assert(!mustExpand);
+
+    if (!featureSIMD)
+    {
+        // We can't support SIMD intrinsics if the JIT doesn't support the feature
+        return nullptr;
+    }
+
+    var_types retType  = JITtype2varType(sig->retType);
+    var_types baseType = TYP_UNKNOWN;
+    var_types simdType = TYP_UNKNOWN;
+    unsigned  simdSize = 0;
+
+    // We want to resolve and populate the handle cache for this type even
+    // if it isn't the basis for anything carried on the node.
+    baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize);
+    assert(simdSize != 0);
+
+    CORINFO_CLASS_HANDLE argClass;
+
+    if (retType == TYP_STRUCT)
+    {
+        baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize);
+        retType  = getSIMDTypeForSize(simdSize);
+    }
+    else
+    {
+        argClass = info.compCompHnd->getArgClass(sig, sig->args);
+        baseType = getBaseTypeAndSizeOfSIMDType(argClass, &simdSize);
+    }
+
+    if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0))
+    {
+        // We need to fixup the clsHnd in the case we are an intrinsic on Vector
+        // The first argument will be the appropriate Vector<T> handle to use
+        clsHnd = info.compCompHnd->getArgClass(sig, sig->args);
+
+        // We also need to adjust the baseType as some methods on Vector return
+        // a type different than the operation we need to perform. An example
+        // is LessThan or Equals which takes double but returns long. This is
+        // unlike the counterparts on Vector<T> which take a return the same type.
+        baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize);
+    }
+
+    simdType = getSIMDTypeForSize(simdSize);
+    assert(varTypeIsSIMD(simdType));
+
+    if (!varTypeIsArithmetic(baseType))
+    {
+        // We only support intrinsics on the 10 primitive arithmetic types
+        return nullptr;
+    }
+
+    NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType);
+
+    if ((hwIntrinsic == NI_Illegal) || !varTypeIsSIMD(simdType))
+    {
+        // The baseType isn't supported by the intrinsic
+        return nullptr;
+    }
+
+    if (SimdAsHWIntrinsicInfo::IsFloatingPointUsed(intrinsic))
+    {
+        // Set `compFloatingPointUsed` to cover the scenario where an intrinsic
+        // is operating on SIMD fields, but where no SIMD local vars are in use.
+        compFloatingPointUsed = true;
+    }
+
+    if (hwIntrinsic == intrinsic)
+    {
+        // The SIMD intrinsic requires special handling outside the normal code path
+        return impSimdAsHWIntrinsicSpecial(intrinsic, clsHnd, sig, retType, baseType, simdSize);
+    }
+
+    CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic);
+
+    if (!compOpportunisticallyDependsOn(hwIntrinsicIsa))
+    {
+        // The JIT doesn't support the required ISA
+        return nullptr;
+    }
+
+    CORINFO_ARG_LIST_HANDLE argList = sig->args;
+    var_types               argType = TYP_UNKNOWN;
+
+    GenTree* op1 = nullptr;
+    GenTree* op2 = nullptr;
+
+    bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic);
+
+    switch (sig->numArgs)
+    {
+        case 1:
+        {
+            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+            op1     = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
+
+            assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic));
+            return gtNewSimdAsHWIntrinsicNode(retType, op1, hwIntrinsic, baseType, simdSize);
+        }
+
+        case 2:
+        {
+            CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
+            argType                      = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
+            op2                          = getArgForHWIntrinsic(argType, argClass);
+
+            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+            op1     = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
+
+            if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic))
+            {
+                std::swap(op1, op2);
+            }
+
+            return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
+        }
+    }
+
+    assert(!"Unexpected SimdAsHWIntrinsic");
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// impSimdAsHWIntrinsicSpecial: Import a SIMD intrinsic as a GT_HWINTRINSIC node if possible
+//                              This method handles cases which cannot be table driven
+//
+// Arguments:
+//    intrinsic  -- id of the intrinsic function.
+//    clsHnd     -- class handle containing the intrinsic function.
+//    sig        -- signature of the intrinsic call
+//    retType    -- the return type of the intrinsic call
+//    baseType   -- the base type of SIMD type of the intrinsic
+//    simdSize   -- the size of the SIMD type of the intrinsic
+//
+// Return Value:
+//    The GT_HWINTRINSIC node, or nullptr if not a supported intrinsic
+//
+GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
+                                               CORINFO_CLASS_HANDLE clsHnd,
+                                               CORINFO_SIG_INFO*    sig,
+                                               var_types            retType,
+                                               var_types            baseType,
+                                               unsigned             simdSize)
+{
+    assert(featureSIMD);
+    assert(retType != TYP_UNKNOWN);
+    assert(varTypeIsArithmetic(baseType));
+    assert(simdSize != 0);
+    assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize)));
+    assert(SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType) == intrinsic);
+
+    CORINFO_ARG_LIST_HANDLE argList = sig->args;
+    var_types               argType = TYP_UNKNOWN;
+    CORINFO_CLASS_HANDLE    argClass;
+
+    GenTree* op1 = nullptr;
+    GenTree* op2 = nullptr;
+
+    SimdAsHWIntrinsicClassId classId          = SimdAsHWIntrinsicInfo::lookupClassId(intrinsic);
+    bool                     isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic);
+
+#if defined(TARGET_XARCH)
+    bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256);
+
+    if ((baseType != TYP_FLOAT) && !compOpportunisticallyDependsOn(InstructionSet_SSE2))
+    {
+        // Vector<T>, for everything but float, requires at least SSE2
+        return nullptr;
+    }
+    else if (!compOpportunisticallyDependsOn(InstructionSet_SSE))
+    {
+        // Vector<float> requires at least SSE
+        return nullptr;
+    }
+
+    // Vector<T>, when 32-bytes, requires at least AVX2
+    assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2));
+#endif
+
+    switch (sig->numArgs)
+    {
+        case 1:
+        {
+            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+            op1     = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
+
+            assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic));
+
+            switch (intrinsic)
+            {
+#if defined(TARGET_XARCH)
+                case NI_Vector2_Abs:
+                case NI_Vector3_Abs:
+                case NI_Vector4_Abs:
+                case NI_VectorT128_Abs:
+                case NI_VectorT256_Abs:
+                {
+                    if (varTypeIsFloating(baseType))
+                    {
+                        // Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
+                        // Abs(vd) = vf & new SIMDVector<double>(0x7fffffffffffffff);
+                        GenTree* bitMask = nullptr;
+
+                        if (baseType == TYP_FLOAT)
+                        {
+                            static_assert_no_msg(sizeof(float) == sizeof(int));
+                            int mask = 0x7fffffff;
+                            bitMask  = gtNewDconNode(*((float*)&mask), TYP_FLOAT);
+                        }
+                        else
+                        {
+                            assert(baseType == TYP_DOUBLE);
+                            static_assert_no_msg(sizeof(double) == sizeof(__int64));
+
+                            __int64 mask = 0x7fffffffffffffffLL;
+                            bitMask      = gtNewDconNode(*((double*)&mask), TYP_DOUBLE);
+                        }
+                        assert(bitMask != nullptr);
+
+                        bitMask = gtNewSIMDNode(retType, bitMask, SIMDIntrinsicInit, baseType, simdSize);
+
+                        intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd;
+                        intrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType);
+
+                        return gtNewSimdAsHWIntrinsicNode(retType, op1, bitMask, intrinsic, baseType, simdSize);
+                    }
+                    else if (varTypeIsUnsigned(baseType))
+                    {
+                        return op1;
+                    }
+                    else if ((baseType != TYP_LONG) && compOpportunisticallyDependsOn(InstructionSet_SSSE3))
+                    {
+                        return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_SSSE3_Abs, baseType, simdSize);
+                    }
+                    else
+                    {
+                        GenTree*       tmp;
+                        NamedIntrinsic hwIntrinsic;
+
+                        GenTree* op1Dup1;
+                        op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                           nullptr DEBUGARG("Clone op1 for Vector<T>.Abs"));
+
+                        GenTree* op1Dup2;
+                        op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                               nullptr DEBUGARG("Clone op1 for Vector<T>.Abs"));
+
+                        // op1 = op1 < Zero
+                        tmp         = gtNewSIMDVectorZero(retType, baseType, simdSize);
+                        hwIntrinsic = isVectorT256 ? NI_VectorT256_LessThan : NI_VectorT128_LessThan;
+                        op1 = impSimdAsHWIntrinsicRelOp(hwIntrinsic, clsHnd, retType, baseType, simdSize, op1, tmp);
+
+                        // tmp = Zero - op1Dup1
+                        tmp         = gtNewSIMDVectorZero(retType, baseType, simdSize);
+                        hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract;
+                        tmp = gtNewSimdAsHWIntrinsicNode(retType, tmp, op1Dup1, hwIntrinsic, baseType, simdSize);
+
+                        // result = ConditionalSelect(op1, tmp, op1Dup2)
+                        return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, tmp, op1Dup2);
+                    }
+                    break;
+                }
+#elif defined(TARGET_ARM64)
+                case NI_VectorT128_Abs:
+                {
+                    assert(varTypeIsUnsigned(baseType));
+                    return op1;
+                }
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+
+                default:
+                {
+                    // Some platforms warn about unhandled switch cases
+                    // We handle it more generally via the assert and nullptr return below.
+                    break;
+                }
+            }
+            break;
+        }
+
+        case 2:
+        {
+            CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
+            argType                      = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
+            op2                          = getArgForHWIntrinsic(argType, argClass);
+
+            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+            op1     = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
+
+            assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic));
+
+            switch (intrinsic)
+            {
+#if defined(TARGET_XARCH)
+                case NI_Vector2_op_Division:
+                case NI_Vector3_op_Division:
+                {
+                    // Vector2/3 div: since the top-most elements will be zero, we end up
+                    // perfoming 0/0 which is a NAN. Therefore, post division we need to set the
+                    // top-most elements to zero. This is achieved by left logical shift followed
+                    // by right logical shift of the result.
+
+                    // These are 16 byte operations, so we subtract from 16 bytes, not the vector register length.
+                    unsigned shiftCount = 16 - simdSize;
+                    assert((shiftCount > 0) && (shiftCount <= 16));
+
+                    // retNode = Sse.Divide(op1, op2);
+                    GenTree* retNode = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, NI_SSE_Divide, baseType, simdSize);
+
+                    // retNode = Sse.ShiftLeftLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle()
+                    retNode = gtNewSimdAsHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT),
+                                                         NI_SSE2_ShiftLeftLogical128BitLane, TYP_INT, simdSize);
+
+                    // retNode = Sse.ShiftRightLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle()
+                    retNode = gtNewSimdAsHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT),
+                                                         NI_SSE2_ShiftRightLogical128BitLane, TYP_INT, simdSize);
+
+                    return retNode;
+                }
+
+                case NI_VectorT128_Equals:
+                case NI_VectorT128_GreaterThan:
+                case NI_VectorT128_GreaterThanOrEqual:
+                case NI_VectorT128_LessThan:
+                case NI_VectorT128_LessThanOrEqual:
+                case NI_VectorT256_GreaterThan:
+                case NI_VectorT256_GreaterThanOrEqual:
+                case NI_VectorT256_LessThan:
+                case NI_VectorT256_LessThanOrEqual:
+                {
+                    return impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2);
+                }
+
+                case NI_VectorT128_Max:
+                case NI_VectorT128_Min:
+                case NI_VectorT256_Max:
+                case NI_VectorT256_Min:
+                {
+                    if ((baseType == TYP_BYTE) || (baseType == TYP_USHORT))
+                    {
+                        GenTree*  constVal = nullptr;
+                        var_types opType   = baseType;
+
+                        NamedIntrinsic opIntrinsic;
+                        NamedIntrinsic hwIntrinsic;
+
+                        switch (baseType)
+                        {
+                            case TYP_BYTE:
+                            {
+                                constVal    = gtNewIconNode(0x80808080, TYP_INT);
+                                opIntrinsic = NI_VectorT128_op_Subtraction;
+                                baseType    = TYP_UBYTE;
+                                break;
+                            }
+
+                            case TYP_USHORT:
+                            {
+                                constVal    = gtNewIconNode(0x80008000, TYP_INT);
+                                opIntrinsic = NI_VectorT128_op_Addition;
+                                baseType    = TYP_SHORT;
+                                break;
+                            }
+
+                            default:
+                            {
+                                unreached();
+                            }
+                        }
+
+                        GenTree* constVector =
+                            gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, TYP_INT, simdSize);
+
+                        GenTree* constVectorDup1;
+                        constVector = impCloneExpr(constVector, &constVectorDup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                                   nullptr DEBUGARG("Clone constVector for Vector<T>.Max/Min"));
+
+                        GenTree* constVectorDup2;
+                        constVectorDup1 =
+                            impCloneExpr(constVectorDup1, &constVectorDup2, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                         nullptr DEBUGARG("Clone constVector for Vector<T>.Max/Min"));
+
+                        hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType);
+
+                        // op1 = op1 - constVector
+                        // -or-
+                        // op1 = op1 + constVector
+                        op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, opType, simdSize);
+
+                        // op2 = op2 - constVectorDup1
+                        // -or-
+                        // op2 = op2 + constVectorDup1
+                        op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup1, hwIntrinsic, opType, simdSize);
+
+                        // op1 = Max(op1, op2)
+                        // -or-
+                        // op1 = Min(op1, op2)
+                        hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType);
+                        op1         = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
+
+                        // result = op1 + constVectorDup2
+                        // -or-
+                        // result = op1 - constVectorDup2
+                        opIntrinsic = (opIntrinsic == NI_VectorT128_op_Subtraction) ? NI_VectorT128_op_Addition
+                                                                                    : NI_VectorT128_op_Subtraction;
+                        hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType);
+                        return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup2, hwIntrinsic, opType, simdSize);
+                    }
+
+                    GenTree* op1Dup;
+                    op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("Clone op1 for Vector<T>.Max/Min"));
+
+                    GenTree* op2Dup;
+                    op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("Clone op2 for Vector<T>.Max/Min"));
+
+                    if ((intrinsic == NI_VectorT128_Max) || (intrinsic == NI_VectorT256_Max))
+                    {
+                        intrinsic = isVectorT256 ? NI_VectorT256_GreaterThan : NI_VectorT128_GreaterThan;
+                    }
+                    else
+                    {
+                        intrinsic = isVectorT256 ? NI_VectorT256_LessThan : NI_VectorT128_LessThan;
+                    }
+
+                    // op1 = op1 > op2
+                    // -or-
+                    // op1 = op1 < op2
+                    op1 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2);
+
+                    // result = ConditionalSelect(op1, op1Dup, op2Dup)
+                    return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup);
+                }
+
+                case NI_VectorT128_op_Multiply:
+                {
+                    assert(baseType == TYP_INT);
+
+                    NamedIntrinsic hwIntrinsic = NI_Illegal;
+
+                    if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+                    {
+                        hwIntrinsic = NI_SSE41_MultiplyLow;
+                    }
+                    else
+                    {
+                        // op1Dup = op1
+                        GenTree* op1Dup;
+                        op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                           nullptr DEBUGARG("Clone op1 for Vector<T>.Multiply"));
+
+                        // op2Dup = op2
+                        GenTree* op2Dup;
+                        op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                           nullptr DEBUGARG("Clone op2 for Vector<T>.Multiply"));
+
+                        // op1 = Sse2.ShiftRightLogical128BitLane(op1, 4)
+                        op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT),
+                                                         NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
+
+                        // op2 = Sse2.ShiftRightLogical128BitLane(op1, 4)
+                        op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(4, TYP_INT),
+                                                         NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
+
+                        // op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32()
+                        op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG, simdSize);
+
+                        // op2 = Sse2.Shuffle(op2, (0, 0, 2, 0))
+                        op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(SHUFFLE_XXZX, TYP_INT),
+                                                         NI_SSE2_Shuffle, baseType, simdSize);
+
+                        // op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32()
+                        op1 =
+                            gtNewSimdAsHWIntrinsicNode(retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG, simdSize);
+
+                        // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0))
+                        op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT),
+                                                         NI_SSE2_Shuffle, baseType, simdSize);
+
+                        // result = Sse2.UnpackLow(op1, op2)
+                        hwIntrinsic = NI_SSE2_UnpackLow;
+                    }
+                    assert(hwIntrinsic != NI_Illegal);
+
+                    return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
+                }
+#elif defined(TARGET_ARM64)
+                case NI_VectorT128_Max:
+                case NI_VectorT128_Min:
+                {
+                    assert((baseType == TYP_LONG) || (baseType == TYP_ULONG));
+
+                    NamedIntrinsic hwIntrinsic;
+
+                    GenTree* op1Dup;
+                    op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("Clone op1 for Vector<T>.Max/Min"));
+
+                    GenTree* op2Dup;
+                    op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("Clone op2 for Vector<T>.Max/Min"));
+
+                    intrinsic = (intrinsic == NI_VectorT128_Max) ? NI_VectorT128_GreaterThan : NI_VectorT128_LessThan;
+
+                    // op1 = op1 > op2
+                    // -or-
+                    // op1 = op1 < op2
+                    hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType);
+                    op1         = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
+
+                    // result = ConditionalSelect(op1, op1Dup, op2Dup)
+                    return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup);
+                }
+#else
+#error Unsupported platform
+#endif // TARGET_XARCH
+
+                default:
+                {
+                    // Some platforms warn about unhandled switch cases
+                    // We handle it more generally via the assert and nullptr return below.
+                    break;
+                }
+            }
+            break;
+        }
+    }
+
+    assert(!"Unexpected SimdAsHWIntrinsic");
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// impSimdAsHWIntrinsicCndSel: Import a SIMD conditional select intrinsic
+//
+// Arguments:
+//    clsHnd     -- class handle containing the intrinsic function.
+//    retType    -- the return type of the intrinsic call
+//    baseType   -- the base type of SIMD type of the intrinsic
+//    simdSize   -- the size of the SIMD type of the intrinsic
+//    op1        -- the first operand of the intrinsic
+//    op2        -- the second operand of the intrinsic
+//    op3        -- the third operand of the intrinsic
+//
+// Return Value:
+//    The GT_HWINTRINSIC node representing the conditional select
+//
+GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd,
+                                              var_types            retType,
+                                              var_types            baseType,
+                                              unsigned             simdSize,
+                                              GenTree*             op1,
+                                              GenTree*             op2,
+                                              GenTree*             op3)
+{
+    assert(featureSIMD);
+    assert(retType != TYP_UNKNOWN);
+    assert(varTypeIsIntegral(baseType));
+    assert(simdSize != 0);
+    assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize)));
+    assert(op1 != nullptr);
+    assert(op2 != nullptr);
+    assert(op3 != nullptr);
+
+#if defined(TARGET_XARCH)
+    bool isVectorT256 = (simdSize == 32);
+
+    // Vector<T> for the rel-ops covered here requires at least SSE2
+    assert(compIsaSupportedDebugOnly(InstructionSet_SSE2));
+
+    // Vector<T>, when 32-bytes, requires at least AVX2
+    assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2));
+
+    if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+    {
+        NamedIntrinsic hwIntrinsic = NI_SSE41_BlendVariable;
+
+        if (isVectorT256)
+        {
+            hwIntrinsic = varTypeIsIntegral(baseType) ? NI_AVX2_BlendVariable : NI_AVX_BlendVariable;
+        }
+
+        return gtNewSimdAsHWIntrinsicNode(retType, op3, op2, op1, hwIntrinsic, baseType, simdSize);
+    }
+#endif // TARGET_XARCH
+
+    NamedIntrinsic hwIntrinsic;
+
+    GenTree* op1Dup;
+    op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                       nullptr DEBUGARG("Clone op1 for Vector<T>.ConditionalSelect"));
+
+    // op2 = op2 & op1
+    hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType);
+    op2         = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, hwIntrinsic, baseType, simdSize);
+
+    // op3 = op3 & ~op1Dup
+    hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_AndNot, baseType);
+
+    if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(NI_VectorT128_AndNot))
+    {
+        std::swap(op3, op1Dup);
+    }
+
+    op3 = gtNewSimdAsHWIntrinsicNode(retType, op3, op1Dup, hwIntrinsic, baseType, simdSize);
+
+    // result = op2 | op3
+    hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType);
+    return gtNewSimdAsHWIntrinsicNode(retType, op2, op3, hwIntrinsic, baseType, simdSize);
+}
+
+#if defined(TARGET_XARCH)
+//------------------------------------------------------------------------
+// impSimdAsHWIntrinsicRelOp: Import a SIMD relational operator intrinsic
+//
+// Arguments:
+//    intrinsic  -- id of the intrinsic function.
+//    clsHnd     -- class handle containing the intrinsic function.
+//    retType    -- the return type of the intrinsic call
+//    baseType   -- the base type of SIMD type of the intrinsic
+//    simdSize   -- the size of the SIMD type of the intrinsic
+//    op1        -- the first operand of the intrinsic
+//    op2        -- the second operand of the intrinsic
+//
+// Return Value:
+//    The GT_HWINTRINSIC node representing the relational operator
+//
+GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic       intrinsic,
+                                             CORINFO_CLASS_HANDLE clsHnd,
+                                             var_types            retType,
+                                             var_types            baseType,
+                                             unsigned             simdSize,
+                                             GenTree*             op1,
+                                             GenTree*             op2)
+{
+    assert(featureSIMD);
+    assert(retType != TYP_UNKNOWN);
+    assert(varTypeIsIntegral(baseType));
+    assert(simdSize != 0);
+    assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize)));
+    assert(op1 != nullptr);
+    assert(op2 != nullptr);
+    assert(!SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic));
+
+    bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256);
+
+    // Vector<T> for the rel-ops covered here requires at least SSE2
+    assert(compIsaSupportedDebugOnly(InstructionSet_SSE2));
+
+    // Vector<T>, when 32-bytes, requires at least AVX2
+    assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2));
+
+    switch (intrinsic)
+    {
+        case NI_VectorT128_Equals:
+        case NI_VectorT256_Equals:
+        {
+            // These ones aren't "special", but they are used by the other
+            // relational operators and so are defined for convenience.
+
+            NamedIntrinsic hwIntrinsic = NI_Illegal;
+
+            if (isVectorT256 || ((baseType != TYP_LONG) && (baseType != TYP_ULONG)))
+            {
+                hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType);
+                assert(hwIntrinsic != intrinsic);
+            }
+            else if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+            {
+                hwIntrinsic = NI_SSE41_CompareEqual;
+            }
+            else
+            {
+                // There is no direct SSE2 support for comparing TYP_LONG vectors.
+                // These have to be implemented in terms of TYP_INT vector comparison operations.
+                //
+                // tmp = (op1 == op2) i.e. compare for equality as if op1 and op2 are Vector<int>
+                // op1 = tmp
+                // op2 = Shuffle(tmp, (2, 3, 0, 1))
+                // result = BitwiseAnd(op1, op2)
+                //
+                // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of
+                // respective long elements.
+
+                hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, TYP_INT);
+                assert(hwIntrinsic != intrinsic);
+
+                GenTree* tmp = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, TYP_INT, simdSize);
+
+                tmp = impCloneExpr(tmp, &op1, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                   nullptr DEBUGARG("Clone tmp for Vector<T>.Equals"));
+
+                op2 = gtNewSimdAsHWIntrinsicNode(retType, tmp, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), NI_SSE2_Shuffle,
+                                                 TYP_INT, simdSize);
+
+                hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType);
+                assert(hwIntrinsic != NI_VectorT128_op_BitwiseAnd);
+            }
+            assert(hwIntrinsic != NI_Illegal);
+
+            return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
+        }
+
+        case NI_VectorT128_GreaterThanOrEqual:
+        case NI_VectorT128_LessThanOrEqual:
+        case NI_VectorT256_GreaterThanOrEqual:
+        case NI_VectorT256_LessThanOrEqual:
+        {
+            // There is no direct support for doing a combined comparison and equality for integral types.
+            // These have to be implemented by performing both halves and combining their results.
+            //
+            // op1Dup = op1
+            // op2Dup = op2
+            //
+            // op1 = GreaterThan(op1, op2)
+            // op2 = Equals(op1Dup, op2Dup)
+            //
+            // result = BitwiseOr(op1, op2)
+            //
+            // Where the GreaterThan(op1, op2) comparison could also be LessThan(op1, op2)
+
+            GenTree* op1Dup;
+            op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                               nullptr DEBUGARG("Clone op1 for Vector<T>.GreaterThanOrEqual/LessThanOrEqual"));
+
+            GenTree* op2Dup;
+            op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                               nullptr DEBUGARG("Clone op2 for Vector<T>.GreaterThanOrEqual/LessThanOrEqual"));
+
+            NamedIntrinsic eqIntrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals;
+
+            switch (intrinsic)
+            {
+                case NI_VectorT128_GreaterThanOrEqual:
+                {
+                    intrinsic = NI_VectorT128_GreaterThan;
+                    break;
+                }
+
+                case NI_VectorT128_LessThanOrEqual:
+                {
+                    intrinsic = NI_VectorT128_LessThan;
+                    break;
+                }
+
+                case NI_VectorT256_GreaterThanOrEqual:
+                {
+                    intrinsic = NI_VectorT256_GreaterThan;
+                    break;
+                }
+
+                case NI_VectorT256_LessThanOrEqual:
+                {
+                    intrinsic = NI_VectorT256_LessThan;
+                    break;
+                }
+
+                default:
+                {
+                    unreached();
+                }
+            }
+
+            op1       = impSimdAsHWIntrinsicRelOp(eqIntrinsic, clsHnd, retType, baseType, simdSize, op1, op2);
+            op2       = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1Dup, op2Dup);
+            intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr;
+
+            NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType);
+            return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
+        }
+
+        case NI_VectorT128_GreaterThan:
+        case NI_VectorT128_LessThan:
+        case NI_VectorT256_GreaterThan:
+        case NI_VectorT256_LessThan:
+        {
+            NamedIntrinsic hwIntrinsic = NI_Illegal;
+
+            if (varTypeIsUnsigned(baseType))
+            {
+                // Vector<byte>, Vector<ushort>, Vector<uint> and Vector<ulong>:
+                // Hardware supports > for signed comparison. Therefore, to use it for
+                // comparing unsigned numbers, we subtract a constant from both the
+                // operands such that the result fits within the corresponding signed
+                // type. The resulting signed numbers are compared using signed comparison.
+                //
+                // Vector<byte>: constant to be subtracted is 2^7
+                // Vector<ushort> constant to be subtracted is 2^15
+                // Vector<uint> constant to be subtracted is 2^31
+                // Vector<ulong> constant to be subtracted is 2^63
+                //
+                // We need to treat op1 and op2 as signed for comparison purpose after
+                // the transformation.
+
+                GenTree*  constVal = nullptr;
+                var_types opType   = baseType;
+
+                switch (baseType)
+                {
+                    case TYP_UBYTE:
+                    {
+                        constVal = gtNewIconNode(0x80808080, TYP_INT);
+                        baseType = TYP_BYTE;
+                        break;
+                    }
+
+                    case TYP_USHORT:
+                    {
+                        constVal = gtNewIconNode(0x80008000, TYP_INT);
+                        baseType = TYP_SHORT;
+                        break;
+                    }
+
+                    case TYP_UINT:
+                    {
+                        constVal = gtNewIconNode(0x80000000, TYP_INT);
+                        baseType = TYP_INT;
+                        break;
+                    }
+
+                    case TYP_ULONG:
+                    {
+                        constVal = gtNewLconNode(0x8000000000000000);
+                        baseType = TYP_LONG;
+                        break;
+                    }
+
+                    default:
+                    {
+                        unreached();
+                    }
+                }
+
+                GenTree* constVector =
+                    gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, constVal->TypeGet(), simdSize);
+
+                GenTree* constVectorDup;
+                constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                           nullptr DEBUGARG("Clone constVector for Vector<T>.GreaterThan/LessThan"));
+
+                NamedIntrinsic hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract;
+
+                // op1 = op1 - constVector
+                op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, opType, simdSize);
+
+                // op2 = op2 - constVector
+                op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, opType, simdSize);
+            }
+
+            // This should have been mutated by the above path
+            assert(varTypeIsIntegral(baseType) && !varTypeIsUnsigned(baseType));
+
+            if (isVectorT256 || (baseType != TYP_LONG))
+            {
+                hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType);
+                assert(hwIntrinsic != intrinsic);
+            }
+            else if (compOpportunisticallyDependsOn(InstructionSet_SSE42))
+            {
+                hwIntrinsic =
+                    (intrinsic == NI_VectorT128_GreaterThan) ? NI_SSE42_CompareGreaterThan : NI_SSE42_CompareLessThan;
+            }
+            else
+            {
+                // There is no direct SSE2 support for comparing TYP_LONG vectors.
+                // These have to be implemented in terms of TYP_INT vector comparison operations.
+                //
+                // Let us consider the case of single long element comparison.
+                // Say op1 = (x1, y1) and op2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the
+                // longs op1 and op2.
+                //
+                // GreaterThan(op1, op2) can be expressed in terms of > relationship between 32-bit integers that
+                // comprise op1 and op2 as
+                //                    =  (x1, y1) > (x2, y2)
+                //                    =  (x1 > x2) || [(x1 == x2) && (y1 > y2)]   - eq (1)
+                //
+                // op1Dup1 = op1
+                // op1Dup2 = op1Dup1
+                // op2Dup1 = op2
+                // op2Dup2 = op2Dup1
+                //
+                // t = (op1 > op2)                - 32-bit signed comparison
+                // u = (op1Dup1 == op2Dup1)       - 32-bit equality comparison
+                // v = (op1Dup2 > op2Dup2)        - 32-bit unsigned comparison
+                //
+                // op1 = Shuffle(t, (3, 3, 1, 1)) - This corresponds to (x1 > x2) in eq(1) above
+                // v = Shuffle(v, (2, 2, 0, 0))   - This corresponds to (y1 > y2) in eq(1) above
+                // u = Shuffle(u, (3, 3, 1, 1))   - This corresponds to (x1 == x2) in eq(1) above
+                // op2 = BitwiseAnd(v, u)         - This corresponds to [(x1 == x2) && (y1 > y2)] in eq(1) above
+                //
+                // result = BitwiseOr(op1, op2)
+
+                GenTree* op1Dup1;
+                op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                   nullptr DEBUGARG("Clone op1 for Vector<T>.GreaterThan/LessThan"));
+
+                GenTree* op1Dup2;
+                op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("Clone op1 for Vector<T>.GreaterThan/LessThan"));
+
+                GenTree* op2Dup1;
+                op2 = impCloneExpr(op2, &op2Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                   nullptr DEBUGARG("Clone op2 for Vector<T>.GreaterThan/LessThan"));
+
+                GenTree* op2Dup2;
+                op2Dup1 = impCloneExpr(op2Dup1, &op2Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("Clone op2 Vector<T>.GreaterThan/LessThan"));
+
+                GenTree* t = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_INT, simdSize, op1, op2);
+                GenTree* u = impSimdAsHWIntrinsicRelOp(NI_VectorT128_Equals, clsHnd, retType, TYP_INT, simdSize,
+                                                       op1Dup1, op2Dup1);
+                GenTree* v =
+                    impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_UINT, simdSize, op1Dup2, op2Dup2);
+
+                op1 = gtNewSimdAsHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
+                                                 TYP_INT, simdSize);
+
+                v = gtNewSimdAsHWIntrinsicNode(retType, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle,
+                                               TYP_INT, simdSize);
+                u = gtNewSimdAsHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
+                                               TYP_INT, simdSize);
+
+                hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType);
+                op2         = gtNewSimdAsHWIntrinsicNode(retType, v, u, hwIntrinsic, baseType, simdSize);
+
+                hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType);
+            }
+            assert(hwIntrinsic != NI_Illegal);
+
+            return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
+        }
+
+        default:
+        {
+            assert(!"Unexpected SimdAsHWIntrinsic");
+            return nullptr;
+        }
+    }
+}
+#endif // TARGET_XARCH
+
+#endif // FEATURE_HW_INTRINSICS
diff --git a/src/coreclr/src/jit/simdashwintrinsic.h b/src/coreclr/src/jit/simdashwintrinsic.h
new file mode 100644 (file)
index 0000000..e5d951e
--- /dev/null
@@ -0,0 +1,130 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SIMD_AS_HWINTRINSIC_H_
+#define _SIMD_AS_HWINTRINSIC_H_
+
+enum class SimdAsHWIntrinsicClassId
+{
+    Unknown,
+    Vector2,
+    Vector3,
+    Vector4,
+    VectorT128,
+    VectorT256,
+};
+
+enum class SimdAsHWIntrinsicFlag : unsigned int
+{
+    None = 0,
+
+    // Indicates compFloatingPointUsed does not need to be set.
+    NoFloatingPointUsed = 0x1,
+
+    // Indicates the intrinsic is for an instance method.
+    InstanceMethod = 0x02,
+
+    // Indicates the operands should be swapped in importation.
+    NeedsOperandsSwapped = 0x04,
+};
+
+inline SimdAsHWIntrinsicFlag operator~(SimdAsHWIntrinsicFlag value)
+{
+    return static_cast<SimdAsHWIntrinsicFlag>(~static_cast<unsigned int>(value));
+}
+
+inline SimdAsHWIntrinsicFlag operator|(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs)
+{
+    return static_cast<SimdAsHWIntrinsicFlag>(static_cast<unsigned int>(lhs) | static_cast<unsigned int>(rhs));
+}
+
+inline SimdAsHWIntrinsicFlag operator&(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs)
+{
+    return static_cast<SimdAsHWIntrinsicFlag>(static_cast<unsigned int>(lhs) & static_cast<unsigned int>(rhs));
+}
+
+inline SimdAsHWIntrinsicFlag operator^(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs)
+{
+    return static_cast<SimdAsHWIntrinsicFlag>(static_cast<unsigned int>(lhs) ^ static_cast<unsigned int>(rhs));
+}
+
+struct SimdAsHWIntrinsicInfo
+{
+    NamedIntrinsic           id;
+    const char*              name;
+    SimdAsHWIntrinsicClassId classId;
+    int                      numArgs;
+    NamedIntrinsic           hwIntrinsic[10];
+    SimdAsHWIntrinsicFlag    flags;
+
+    static const SimdAsHWIntrinsicInfo& lookup(NamedIntrinsic id);
+
+    static NamedIntrinsic lookupId(CORINFO_SIG_INFO* sig,
+                                   const char*       className,
+                                   const char*       methodName,
+                                   const char*       enclosingClassName,
+                                   int               sizeOfVectorT);
+    static SimdAsHWIntrinsicClassId lookupClassId(const char* className,
+                                                  const char* enclosingClassName,
+                                                  int         sizeOfVectorT);
+
+    // Member lookup
+
+    static NamedIntrinsic lookupId(NamedIntrinsic id)
+    {
+        return lookup(id).id;
+    }
+
+    static const char* lookupName(NamedIntrinsic id)
+    {
+        return lookup(id).name;
+    }
+
+    static SimdAsHWIntrinsicClassId lookupClassId(NamedIntrinsic id)
+    {
+        return lookup(id).classId;
+    }
+
+    static int lookupNumArgs(NamedIntrinsic id)
+    {
+        return lookup(id).numArgs;
+    }
+
+    static NamedIntrinsic lookupHWIntrinsic(NamedIntrinsic id, var_types type)
+    {
+        if ((type < TYP_BYTE) || (type > TYP_DOUBLE))
+        {
+            assert(!"Unexpected type");
+            return NI_Illegal;
+        }
+        return lookup(id).hwIntrinsic[type - TYP_BYTE];
+    }
+
+    static SimdAsHWIntrinsicFlag lookupFlags(NamedIntrinsic id)
+    {
+        return lookup(id).flags;
+    }
+
+    // Flags lookup
+
+    static bool IsFloatingPointUsed(NamedIntrinsic id)
+    {
+        SimdAsHWIntrinsicFlag flags = lookupFlags(id);
+        return (flags & SimdAsHWIntrinsicFlag::NoFloatingPointUsed) == SimdAsHWIntrinsicFlag::None;
+    }
+
+    static bool IsInstanceMethod(NamedIntrinsic id)
+    {
+        SimdAsHWIntrinsicFlag flags = lookupFlags(id);
+        return (flags & SimdAsHWIntrinsicFlag::InstanceMethod) == SimdAsHWIntrinsicFlag::InstanceMethod;
+    }
+
+    static bool NeedsOperandsSwapped(NamedIntrinsic id)
+    {
+        SimdAsHWIntrinsicFlag flags = lookupFlags(id);
+        return (flags & SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) == SimdAsHWIntrinsicFlag::NeedsOperandsSwapped;
+    }
+};
+
+#endif // _SIMD_AS_HWINTRINSIC_H_
diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h
new file mode 100644 (file)
index 0000000..cfd4793
--- /dev/null
@@ -0,0 +1,89 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef SIMD_AS_HWINTRINSIC
+#error Define SIMD_AS_HWINTRINSIC before including this file
+#endif
+/*****************************************************************************/
+
+// clang-format off
+
+#ifdef FEATURE_HW_INTRINSICS
+
+/* Note
+    * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic`
+    * Each intrinsic has a `NumArg` for number of parameters
+    * Each intrinsic has 10 `NamedIntrinsic` fields that list the HWIntrinsic that should be generated based-on the base type
+        * NI_Illegal is used to represent an unsupported type
+        * Using the same Intrinsic ID as the represented entry is used to indicate special handling is required
+    * Each intrinsic has one or more flags with type of `enum SimdAsHWIntrinsicFlag`
+*/
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                                                   Flags
+//                                                                      {TYP_BYTE,                                      TYP_UBYTE,                                      TYP_SHORT,                                      TYP_USHORT,                                     TYP_INT,                                        TYP_UINT,                                       TYP_LONG,                                       TYP_ULONG,                                      TYP_FLOAT,                                      TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector2 Intrinsics
+SIMD_AS_HWINTRINSIC(Vector2,     Abs,                        1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Abs,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     Max,                        2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Max,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     Min,                        2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Min,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Addition,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Add,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Division,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Arm64_Divide,                        NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Multiply,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Multiply,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Subtraction,             2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Subtract,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                                                   Flags
+//                                                                      {TYP_BYTE,                                      TYP_UBYTE,                                      TYP_SHORT,                                      TYP_USHORT,                                     TYP_INT,                                        TYP_UINT,                                       TYP_LONG,                                       TYP_ULONG,                                      TYP_FLOAT,                                      TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector3 Intrinsics
+SIMD_AS_HWINTRINSIC(Vector3,     Abs,                        1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Abs,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     Max,                        2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Max,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     Min,                        2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Min,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Addition,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Add,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Division,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Arm64_Divide,                        NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Multiply,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Multiply,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Subtraction,             2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Subtract,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                                                   Flags
+//                                                                      {TYP_BYTE,                                      TYP_UBYTE,                                      TYP_SHORT,                                      TYP_USHORT,                                     TYP_INT,                                        TYP_UINT,                                       TYP_LONG,                                       TYP_ULONG,                                      TYP_FLOAT,                                      TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector4 Intrinsics
+SIMD_AS_HWINTRINSIC(Vector4,     Abs,                        1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Abs,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     Max,                        2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Max,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     Min,                        2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Min,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Addition,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Add,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Division,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Arm64_Divide,                        NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Multiply,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Multiply,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Subtraction,             2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Subtract,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                                                   Flags
+//                                                                      {TYP_BYTE,                                      TYP_UBYTE,                                      TYP_SHORT,                                      TYP_USHORT,                                     TYP_INT,                                        TYP_UINT,                                       TYP_LONG,                                       TYP_ULONG,                                      TYP_FLOAT,                                      TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector<T> Intrinsics
+SIMD_AS_HWINTRINSIC(VectorT128,  Abs,                        1,         {NI_AdvSimd_Abs,                                NI_VectorT128_Abs,                              NI_AdvSimd_Abs,                                 NI_VectorT128_Abs,                              NI_AdvSimd_Abs,                                 NI_VectorT128_Abs,                              NI_AdvSimd_Arm64_Abs,                           NI_VectorT128_Abs,                              NI_AdvSimd_Abs,                                 NI_AdvSimd_Arm64_Abs},                          SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  AndNot,                     2,         {NI_AdvSimd_BitwiseClear,                       NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear,                        NI_AdvSimd_BitwiseClear},                       SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  Equals,                     2,         {NI_AdvSimd_CompareEqual,                       NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_Arm64_CompareEqual,                  NI_AdvSimd_Arm64_CompareEqual,                  NI_AdvSimd_CompareEqual,                        NI_AdvSimd_Arm64_CompareEqual},                 SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  GreaterThan,                2,         {NI_AdvSimd_CompareGreaterThan,                 NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_Arm64_CompareGreaterThan,            NI_AdvSimd_Arm64_CompareGreaterThan,            NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_Arm64_CompareGreaterThan},           SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  GreaterThanOrEqual,         2,         {NI_AdvSimd_CompareGreaterThanOrEqual,          NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_Arm64_CompareGreaterThanOrEqual,     NI_AdvSimd_Arm64_CompareGreaterThanOrEqual,     NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_Arm64_CompareGreaterThanOrEqual},    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  LessThan,                   2,         {NI_AdvSimd_CompareLessThan,                    NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_Arm64_CompareLessThan,               NI_AdvSimd_Arm64_CompareLessThan,               NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_Arm64_CompareLessThan},              SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  LessThanOrEqual,            2,         {NI_AdvSimd_CompareLessThanOrEqual,             NI_AdvSimd_CompareLessThanOrEqual,              NI_AdvSimd_CompareLessThanOrEqual,              NI_AdvSimd_CompareLessThanOrEqual,              NI_AdvSimd_CompareLessThanOrEqual,              NI_AdvSimd_CompareLessThanOrEqual,              NI_AdvSimd_Arm64_CompareLessThanOrEqual,        NI_AdvSimd_Arm64_CompareLessThanOrEqual,        NI_AdvSimd_CompareLessThanOrEqual,              NI_AdvSimd_Arm64_CompareLessThanOrEqual},       SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  Max,                        2,         {NI_AdvSimd_Max,                                NI_AdvSimd_Max,                                 NI_AdvSimd_Max,                                 NI_AdvSimd_Max,                                 NI_AdvSimd_Max,                                 NI_AdvSimd_Max,                                 NI_VectorT128_Max,                              NI_VectorT128_Max,                              NI_AdvSimd_Max,                                 NI_AdvSimd_Arm64_Max},                          SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  Min,                        2,         {NI_AdvSimd_Min,                                NI_AdvSimd_Min,                                 NI_AdvSimd_Min,                                 NI_AdvSimd_Min,                                 NI_AdvSimd_Min,                                 NI_AdvSimd_Min,                                 NI_VectorT128_Min,                              NI_VectorT128_Min,                              NI_AdvSimd_Min,                                 NI_AdvSimd_Arm64_Min},                          SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Addition,                2,         {NI_AdvSimd_Add,                                NI_AdvSimd_Add,                                 NI_AdvSimd_Add,                                 NI_AdvSimd_Add,                                 NI_AdvSimd_Add,                                 NI_AdvSimd_Add,                                 NI_AdvSimd_Add,                                 NI_AdvSimd_Add,                                 NI_AdvSimd_Add,                                 NI_AdvSimd_Arm64_Add},                          SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_BitwiseAnd,              2,         {NI_AdvSimd_And,                                NI_AdvSimd_And,                                 NI_AdvSimd_And,                                 NI_AdvSimd_And,                                 NI_AdvSimd_And,                                 NI_AdvSimd_And,                                 NI_AdvSimd_And,                                 NI_AdvSimd_And,                                 NI_AdvSimd_And,                                 NI_AdvSimd_And},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_BitwiseOr,               2,         {NI_AdvSimd_Or,                                 NI_AdvSimd_Or,                                  NI_AdvSimd_Or,                                  NI_AdvSimd_Or,                                  NI_AdvSimd_Or,                                  NI_AdvSimd_Or,                                  NI_AdvSimd_Or,                                  NI_AdvSimd_Or,                                  NI_AdvSimd_Or,                                  NI_AdvSimd_Or},                                 SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Division,                2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Arm64_Divide,                        NI_AdvSimd_Arm64_Divide},                       SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_ExclusiveOr,             2,         {NI_AdvSimd_Xor,                                NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor,                                 NI_AdvSimd_Xor},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Multiply,                2,         {NI_AdvSimd_Multiply,                           NI_AdvSimd_Multiply,                            NI_AdvSimd_Multiply,                            NI_AdvSimd_Multiply,                            NI_AdvSimd_Multiply,                            NI_AdvSimd_Multiply,                            NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Multiply,                            NI_AdvSimd_Arm64_Multiply},                     SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Subtraction,             2,         {NI_AdvSimd_Subtract,                           NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Arm64_Subtract},                     SimdAsHWIntrinsicFlag::None)
+
+#endif // FEATURE_HW_INTRINSICS
+
+#undef SIMD_AS_HWINTRINSIC
+
+// clang-format on
diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h
new file mode 100644 (file)
index 0000000..8f2ac62
--- /dev/null
@@ -0,0 +1,111 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef SIMD_AS_HWINTRINSIC
+#error Define SIMD_AS_HWINTRINSIC before including this file
+#endif
+/*****************************************************************************/
+
+// clang-format off
+
+#ifdef FEATURE_HW_INTRINSICS
+
+/* Note
+    * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic`
+    * Each intrinsic has a `NumArg` for number of parameters
+    * Each intrinsic has 10 `NamedIntrinsic` fields that list the HWIntrinsic that should be generated based-on the base type
+        * NI_Illegal is used to represent an unsupported type
+        * Using the same Intrinsic ID as the represented entry is used to indicate special handling is required
+    * Each intrinsic has one or more flags with type of `enum SimdAsHWIntrinsicFlag`
+*/
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                           Flags
+//                                                                      {TYP_BYTE,                                  TYP_UBYTE,                                  TYP_SHORT,                                  TYP_USHORT,                                 TYP_INT,                                    TYP_UINT,                                   TYP_LONG,                                   TYP_ULONG,                                  TYP_FLOAT,                                  TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector2 Intrinsics
+SIMD_AS_HWINTRINSIC(Vector2,     Abs,                        1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector2_Abs,                             NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     Max,                        2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Max,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     Min,                        2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Min,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Addition,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Add,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Division,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector2_op_Division,                     NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Multiply,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Multiply,                            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector2,     op_Subtraction,             2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Subtract,                            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                           Flags
+//                                                                      {TYP_BYTE,                                  TYP_UBYTE,                                  TYP_SHORT,                                  TYP_USHORT,                                 TYP_INT,                                    TYP_UINT,                                   TYP_LONG,                                   TYP_ULONG,                                  TYP_FLOAT,                                  TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector3 Intrinsics
+SIMD_AS_HWINTRINSIC(Vector3,     Abs,                        1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector3_Abs,                             NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     Max,                        2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Max,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     Min,                        2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Min,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Addition,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Add,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Division,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector3_op_Division,                     NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Multiply,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Multiply,                            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector3,     op_Subtraction,             2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Subtract,                            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                           Flags
+//                                                                      {TYP_BYTE,                                  TYP_UBYTE,                                  TYP_SHORT,                                  TYP_USHORT,                                 TYP_INT,                                    TYP_UINT,                                   TYP_LONG,                                   TYP_ULONG,                                  TYP_FLOAT,                                  TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector4 Intrinsics
+SIMD_AS_HWINTRINSIC(Vector4,     Abs,                        1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector4_Abs,                             NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     Max,                        2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Max,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     Min,                        2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Min,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Addition,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Add,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Division,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Divide,                              NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Multiply,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Multiply,                            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(Vector4,     op_Subtraction,             2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Subtract,                            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                           Flags
+//                                                                      {TYP_BYTE,                                  TYP_UBYTE,                                  TYP_SHORT,                                  TYP_USHORT,                                 TYP_INT,                                    TYP_UINT,                                   TYP_LONG,                                   TYP_ULONG,                                  TYP_FLOAT,                                  TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector<T> Intrinsics
+SIMD_AS_HWINTRINSIC(VectorT128,  Abs,                        1,         {NI_VectorT128_Abs,                         NI_VectorT128_Abs,                          NI_VectorT128_Abs,                          NI_VectorT128_Abs,                          NI_VectorT128_Abs,                          NI_VectorT128_Abs,                          NI_VectorT128_Abs,                          NI_VectorT128_Abs,                          NI_VectorT128_Abs,                          NI_VectorT128_Abs},                         SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  AndNot,                     2,         {NI_SSE2_AndNot,                            NI_SSE2_AndNot,                             NI_SSE2_AndNot,                             NI_SSE2_AndNot,                             NI_SSE2_AndNot,                             NI_SSE2_AndNot,                             NI_SSE2_AndNot,                             NI_SSE2_AndNot,                             NI_SSE_AndNot,                              NI_SSE2_AndNot},                            SimdAsHWIntrinsicFlag::NeedsOperandsSwapped)
+SIMD_AS_HWINTRINSIC(VectorT128,  Equals,                     2,         {NI_SSE2_CompareEqual,                      NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_VectorT128_Equals,                       NI_VectorT128_Equals,                       NI_SSE_CompareEqual,                        NI_SSE2_CompareEqual},                      SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  GreaterThan,                2,         {NI_SSE2_CompareGreaterThan,                NI_VectorT128_GreaterThan,                  NI_SSE2_CompareGreaterThan,                 NI_VectorT128_GreaterThan,                  NI_SSE2_CompareGreaterThan,                 NI_VectorT128_GreaterThan,                  NI_VectorT128_GreaterThan,                  NI_VectorT128_GreaterThan,                  NI_SSE_CompareGreaterThan,                  NI_SSE2_CompareGreaterThan},                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  GreaterThanOrEqual,         2,         {NI_VectorT128_GreaterThanOrEqual,          NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_SSE_CompareGreaterThanOrEqual,           NI_SSE2_CompareGreaterThanOrEqual},         SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  LessThan,                   2,         {NI_SSE2_CompareLessThan,                   NI_VectorT128_LessThan,                     NI_SSE2_CompareLessThan,                    NI_VectorT128_LessThan,                     NI_SSE2_CompareLessThan,                    NI_VectorT128_LessThan,                     NI_VectorT128_LessThan,                     NI_VectorT128_LessThan,                     NI_SSE_CompareLessThan,                     NI_SSE2_CompareLessThan},                   SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  LessThanOrEqual,            2,         {NI_VectorT128_LessThanOrEqual,             NI_VectorT128_LessThanOrEqual,              NI_VectorT128_LessThanOrEqual,              NI_VectorT128_LessThanOrEqual,              NI_VectorT128_LessThanOrEqual,              NI_VectorT128_LessThanOrEqual,              NI_VectorT128_LessThanOrEqual,              NI_VectorT128_LessThanOrEqual,              NI_SSE_CompareLessThanOrEqual,              NI_SSE2_CompareLessThanOrEqual},            SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  Max,                        2,         {NI_VectorT128_Max,                         NI_SSE2_Max,                                NI_SSE2_Max,                                NI_VectorT128_Max,                          NI_VectorT128_Max,                          NI_VectorT128_Max,                          NI_VectorT128_Max,                          NI_VectorT128_Max,                          NI_SSE_Max,                                 NI_SSE2_Max},                               SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  Min,                        2,         {NI_VectorT128_Min,                         NI_SSE2_Min,                                NI_SSE2_Min,                                NI_VectorT128_Min,                          NI_VectorT128_Min,                          NI_VectorT128_Min,                          NI_VectorT128_Min,                          NI_VectorT128_Min,                          NI_SSE_Min,                                 NI_SSE2_Min},                               SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Addition,                2,         {NI_SSE2_Add,                               NI_SSE2_Add,                                NI_SSE2_Add,                                NI_SSE2_Add,                                NI_SSE2_Add,                                NI_SSE2_Add,                                NI_SSE2_Add,                                NI_SSE2_Add,                                NI_SSE_Add,                                 NI_SSE2_Add},                               SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_BitwiseAnd,              2,         {NI_SSE2_And,                               NI_SSE2_And,                                NI_SSE2_And,                                NI_SSE2_And,                                NI_SSE2_And,                                NI_SSE2_And,                                NI_SSE2_And,                                NI_SSE2_And,                                NI_SSE_And,                                 NI_SSE2_And},                               SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_BitwiseOr,               2,         {NI_SSE2_Or,                                NI_SSE2_Or,                                 NI_SSE2_Or,                                 NI_SSE2_Or,                                 NI_SSE2_Or,                                 NI_SSE2_Or,                                 NI_SSE2_Or,                                 NI_SSE2_Or,                                 NI_SSE_Or,                                  NI_SSE2_Or},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Division,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Divide,                              NI_SSE2_Divide},                            SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_ExclusiveOr,             2,         {NI_SSE2_Xor,                               NI_SSE2_Xor,                                NI_SSE2_Xor,                                NI_SSE2_Xor,                                NI_SSE2_Xor,                                NI_SSE2_Xor,                                NI_SSE2_Xor,                                NI_SSE2_Xor,                                NI_SSE_Xor,                                 NI_SSE2_Xor},                               SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Multiply,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_SSE2_MultiplyLow,                        NI_Illegal,                                 NI_VectorT128_op_Multiply,                  NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Multiply,                            NI_SSE2_Multiply},                          SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT128,  op_Subtraction,             2,         {NI_SSE2_Subtract,                          NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE_Subtract,                            NI_SSE2_Subtract},                          SimdAsHWIntrinsicFlag::None)
+
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                  ISA          Function name               NumArg                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                           Flags
+//                                                                      {TYP_BYTE,                                  TYP_UBYTE,                                  TYP_SHORT,                                  TYP_USHORT,                                 TYP_INT,                                    TYP_UINT,                                   TYP_LONG,                                   TYP_ULONG,                                  TYP_FLOAT,                                  TYP_DOUBLE}
+// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  Vector<T> Intrinsics
+SIMD_AS_HWINTRINSIC(VectorT256,  Abs,                        1,         {NI_AVX2_Abs,                               NI_VectorT256_Abs,                          NI_AVX2_Abs,                                NI_VectorT256_Abs,                          NI_AVX2_Abs,                                NI_VectorT256_Abs,                          NI_VectorT256_Abs,                          NI_VectorT256_Abs,                          NI_VectorT256_Abs,                          NI_VectorT256_Abs},                         SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  AndNot,                     2,         {NI_AVX2_AndNot,                            NI_AVX2_AndNot,                             NI_AVX2_AndNot,                             NI_AVX2_AndNot,                             NI_AVX2_AndNot,                             NI_AVX2_AndNot,                             NI_AVX2_AndNot,                             NI_AVX2_AndNot,                             NI_AVX_AndNot,                              NI_AVX_AndNot},                             SimdAsHWIntrinsicFlag::NeedsOperandsSwapped)
+SIMD_AS_HWINTRINSIC(VectorT256,  Equals,                     2,         {NI_AVX2_CompareEqual,                      NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX_CompareEqual,                        NI_AVX_CompareEqual},                       SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  GreaterThan,                2,         {NI_AVX2_CompareGreaterThan,                NI_VectorT256_GreaterThan,                  NI_AVX2_CompareGreaterThan,                 NI_VectorT256_GreaterThan,                  NI_AVX2_CompareGreaterThan,                 NI_VectorT256_GreaterThan,                  NI_AVX2_CompareGreaterThan,                 NI_VectorT256_GreaterThan,                  NI_AVX_CompareGreaterThan,                  NI_AVX_CompareGreaterThan},                 SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  GreaterThanOrEqual,         2,         {NI_VectorT256_GreaterThanOrEqual,          NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_AVX_CompareGreaterThanOrEqual,           NI_AVX_CompareGreaterThanOrEqual},          SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  LessThan,                   2,         {NI_AVX2_CompareLessThan,                   NI_VectorT256_LessThan,                     NI_AVX2_CompareLessThan,                    NI_VectorT256_LessThan,                     NI_AVX2_CompareLessThan,                    NI_VectorT256_LessThan,                     NI_AVX2_CompareLessThan,                    NI_VectorT256_LessThan,                     NI_AVX_CompareLessThan,                     NI_AVX_CompareLessThan},                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  LessThanOrEqual,            2,         {NI_VectorT256_LessThanOrEqual,             NI_VectorT256_LessThanOrEqual,              NI_VectorT256_LessThanOrEqual,              NI_VectorT256_LessThanOrEqual,              NI_VectorT256_LessThanOrEqual,              NI_VectorT256_LessThanOrEqual,              NI_VectorT256_LessThanOrEqual,              NI_VectorT256_LessThanOrEqual,              NI_AVX_CompareLessThanOrEqual,              NI_AVX_CompareLessThanOrEqual},             SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  Max,                        2,         {NI_AVX2_Max,                               NI_AVX2_Max,                                NI_AVX2_Max,                                NI_AVX2_Max,                                NI_AVX2_Max,                                NI_AVX2_Max,                                NI_VectorT256_Max,                          NI_VectorT256_Max,                          NI_AVX_Max,                                 NI_AVX_Max},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  Min,                        2,         {NI_AVX2_Min,                               NI_AVX2_Min,                                NI_AVX2_Min,                                NI_AVX2_Min,                                NI_AVX2_Min,                                NI_AVX2_Min,                                NI_VectorT256_Min,                          NI_VectorT256_Min,                          NI_AVX_Min,                                 NI_AVX_Min},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  op_Addition,                2,         {NI_AVX2_Add,                               NI_AVX2_Add,                                NI_AVX2_Add,                                NI_AVX2_Add,                                NI_AVX2_Add,                                NI_AVX2_Add,                                NI_AVX2_Add,                                NI_AVX2_Add,                                NI_AVX_Add,                                 NI_AVX_Add},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  op_BitwiseAnd,              2,         {NI_AVX2_And,                               NI_AVX2_And,                                NI_AVX2_And,                                NI_AVX2_And,                                NI_AVX2_And,                                NI_AVX2_And,                                NI_AVX2_And,                                NI_AVX2_And,                                NI_AVX_And,                                 NI_AVX_And},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  op_BitwiseOr,               2,         {NI_AVX2_Or,                                NI_AVX2_Or,                                 NI_AVX2_Or,                                 NI_AVX2_Or,                                 NI_AVX2_Or,                                 NI_AVX2_Or,                                 NI_AVX2_Or,                                 NI_AVX2_Or,                                 NI_AVX_Or,                                  NI_AVX_Or},                                 SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  op_Division,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_AVX_Divide,                              NI_AVX_Divide},                             SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  op_ExclusiveOr,             2,         {NI_AVX2_Xor,                               NI_AVX2_Xor,                                NI_AVX2_Xor,                                NI_AVX2_Xor,                                NI_AVX2_Xor,                                NI_AVX2_Xor,                                NI_AVX2_Xor,                                NI_AVX2_Xor,                                NI_AVX_Xor,                                 NI_AVX_Xor},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  op_Multiply,                2,         {NI_Illegal,                                NI_Illegal,                                 NI_AVX2_MultiplyLow,                        NI_Illegal,                                 NI_AVX2_MultiplyLow,                        NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_AVX_Multiply,                            NI_AVX_Multiply},                           SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC(VectorT256,  op_Subtraction,             2,         {NI_AVX2_Subtract,                          NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX_Subtract,                            NI_AVX_Subtract},                           SimdAsHWIntrinsicFlag::None)
+
+#endif // FEATURE_HW_INTRINSICS
+
+#undef SIMD_AS_HWINTRINSIC
+
+// clang-format on
index b2d5834..27d3469 100644 (file)
@@ -211,7 +211,6 @@ namespace System.Numerics
         /// <param name="left">The scalar value.</param>
         /// <param name="right">The source vector.</param>
         /// <returns>The scaled vector.</returns>
-        [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector2 operator *(float left, Vector2 right)
         {
@@ -224,7 +223,6 @@ namespace System.Numerics
         /// <param name="left">The source vector.</param>
         /// <param name="right">The scalar value.</param>
         /// <returns>The scaled vector.</returns>
-        [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector2 operator *(Vector2 left, float right)
         {
index c41baa4..df32e83 100644 (file)
@@ -230,7 +230,6 @@ namespace System.Numerics
         /// <param name="left">The source vector.</param>
         /// <param name="right">The scalar value.</param>
         /// <returns>The scaled vector.</returns>
-        [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector3 operator *(Vector3 left, float right)
         {
@@ -243,7 +242,6 @@ namespace System.Numerics
         /// <param name="left">The scalar value.</param>
         /// <param name="right">The source vector.</param>
         /// <returns>The scaled vector.</returns>
-        [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector3 operator *(float left, Vector3 right)
         {
index 440c788..70d6924 100644 (file)
@@ -265,7 +265,6 @@ namespace System.Numerics
         /// <param name="left">The source vector.</param>
         /// <param name="right">The scalar value.</param>
         /// <returns>The scaled vector.</returns>
-        [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector4 operator *(Vector4 left, float right)
         {
@@ -278,7 +277,6 @@ namespace System.Numerics
         /// <param name="left">The scalar value.</param>
         /// <param name="right">The source vector.</param>
         /// <returns>The scaled vector.</returns>
-        [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector4 operator *(float left, Vector4 right)
         {