Marking SSE HWIntrinsics as fully implemented
[platform/upstream/coreclr.git] / src / jit / hwintrinsicxarch.cpp
index fcf0f44..d8ac1ae 100644 (file)
@@ -8,13 +8,20 @@
 
 struct HWIntrinsicInfo
 {
-    NamedIntrinsic intrinsicID;
-    const char*    intrinsicName;
-    InstructionSet isa;
-}
+    NamedIntrinsic      intrinsicID;
+    const char*         intrinsicName;
+    InstructionSet      isa;
+    int                 ival;
+    unsigned            simdSize;
+    int                 numArgs;
+    instruction         ins[10];
+    HWIntrinsicCategory category;
+    HWIntrinsicFlag     flags;
+};
 
-static const hwIntrinsicInfoArray[] = {
-#define HARDWARE_INTRINSIC(id, name, isa) {NI_##id, name, InstructionSet_##isa},
+static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
+#define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
+    {NI_##id, name, InstructionSet_##isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag},
 #include "hwintrinsiclistxarch.h"
 };
 
@@ -23,27 +30,6 @@ extern const char* getHWIntrinsicName(NamedIntrinsic intrinsic)
     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].intrinsicName;
 }
 
-static const bool isNumericType(var_types type)
-{
-    switch (type)
-    {
-        case TYP_BYTE:
-        case TYP_UBYTE:
-        case TYP_SHORT:
-        case TYP_USHORT:
-        case TYP_INT:
-        case TYP_UINT:
-        case TYP_LONG:
-        case TYP_ULONG:
-        case TYP_FLOAT:
-        case TYP_DOUBLE:
-            return true;
-
-        default:
-            return false;
-    }
-}
-
 //------------------------------------------------------------------------
 // lookupHWIntrinsicISA: map class name to InstructionSet value
 //
@@ -174,37 +160,139 @@ InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
 }
 
 //------------------------------------------------------------------------
-// isIntrinsicAnIsSupportedPropertyGetter: return true if the intrinsic is "get_IsSupported"
+// ivalOfHWIntrinsic: get the imm8 value of this intrinsic from the hwIntrinsicInfoArray table
 //
 // Arguments:
 //    intrinsic -- id of the intrinsic function.
 //
 // Return Value:
-//    true if the intrinsic is "get_IsSupported"
-//    Sometimes we need to specially treat "get_IsSupported"
-bool Compiler::isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic)
+//     The imm8 value that is implicit for this intrinsic, or -1 for intrinsics that do not take an immediate, or for
+//     which the immediate is an explicit argument.
+//
+int Compiler::ivalOfHWIntrinsic(NamedIntrinsic intrinsic)
 {
-    switch (intrinsic)
+    assert(intrinsic != NI_Illegal);
+    assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].ival;
+}
+
+//------------------------------------------------------------------------
+// simdSizeOfHWIntrinsic: get the SIMD size of this intrinsic
+//
+// Arguments:
+//    intrinsic -- id of the intrinsic function.
+//
+// Return Value:
+//     the SIMD size of this intrinsic
+//         - from the hwIntrinsicInfoArray table if intrinsic has NO HW_Flag_UnfixedSIMDSize
+//         - TODO-XArch-NYI - from the signature if intrinsic has HW_Flag_UnfixedSIMDSize
+//
+// Note - this function is only used by the importer
+//        after importation (i.e., codegen), we can get the SIMD size from GenTreeHWIntrinsic IR
+static unsigned simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig)
+{
+    assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
+    assert((hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flags & HW_Flag_UnfixedSIMDSize) == 0);
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].simdSize;
+}
+
+//------------------------------------------------------------------------
+// numArgsOfHWIntrinsic: get the number of arguments
+//
+// Arguments:
+//    intrinsic -- id of the intrinsic function.
+//
+// Return Value:
+//     number of arguments
+//
+int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic)
+{
+    assert(intrinsic != NI_Illegal);
+    assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs;
+}
+
+//------------------------------------------------------------------------
+// insOfHWIntrinsic: get the instruction of the given intrinsic
+//
+// Arguments:
+//    intrinsic -- id of the intrinsic function.
+//    type      -- vector base type of this intrinsic
+//
+// Return Value:
+//     the instruction of the given intrinsic on the base type
+//     return INS_invalid for unsupported base types
+//
+instruction Compiler::insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type)
+{
+    assert(intrinsic != NI_Illegal);
+    assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
+    assert(type >= TYP_BYTE && type <= TYP_DOUBLE);
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].ins[type - TYP_BYTE];
+}
+
+//------------------------------------------------------------------------
+// categoryOfHWIntrinsic: get the category of the given intrinsic
+//
+// Arguments:
+//    intrinsic -- id of the intrinsic function.
+//
+// Return Value:
+//     the category of the given intrinsic
+//
+HWIntrinsicCategory Compiler::categoryOfHWIntrinsic(NamedIntrinsic intrinsic)
+{
+    assert(intrinsic != NI_Illegal);
+    assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].category;
+}
+
+//------------------------------------------------------------------------
+// HWIntrinsicFlag: get the flags of the given intrinsic
+//
+// Arguments:
+//    intrinsic -- id of the intrinsic function.
+//
+// Return Value:
+//     the flags of the given intrinsic
+//
+HWIntrinsicFlag Compiler::flagsOfHWIntrinsic(NamedIntrinsic intrinsic)
+{
+    assert(intrinsic != NI_Illegal);
+    assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flags;
+}
+
+//------------------------------------------------------------------------
+// getArgForHWIntrinsic: get the argument from the stack and match  the signature
+//
+// Arguments:
+//    argType   -- the required type of argument
+//    argClass  -- the class handle of argType
+//
+// Return Value:
+//     get the argument at the given index from the stack and match  the signature
+//
+GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass)
+{
+    GenTree* arg = nullptr;
+    if (argType == TYP_STRUCT)
     {
-        case NI_SSE_IsSupported:
-        case NI_SSE2_IsSupported:
-        case NI_SSE3_IsSupported:
-        case NI_SSSE3_IsSupported:
-        case NI_SSE41_IsSupported:
-        case NI_SSE42_IsSupported:
-        case NI_AVX_IsSupported:
-        case NI_AVX2_IsSupported:
-        case NI_AES_IsSupported:
-        case NI_BMI1_IsSupported:
-        case NI_BMI2_IsSupported:
-        case NI_FMA_IsSupported:
-        case NI_LZCNT_IsSupported:
-        case NI_PCLMULQDQ_IsSupported:
-        case NI_POPCNT_IsSupported:
-            return true;
-        default:
-            return false;
+        unsigned int argSizeBytes;
+        var_types    base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes);
+        argType           = getSIMDTypeForSize(argSizeBytes);
+        assert(argType == TYP_SIMD32 || argType == TYP_SIMD16);
+        arg = impSIMDPopStack(argType);
+        assert(arg->TypeGet() == TYP_SIMD16 || arg->TypeGet() == TYP_SIMD32);
+    }
+    else
+    {
+        assert(varTypeIsArithmetic(argType));
+        arg = impPopStack().val;
+        assert(varTypeIsArithmetic(arg->TypeGet()));
+        assert(genActualType(arg->gtType) == genActualType(argType));
     }
+    return arg;
 }
 
 //------------------------------------------------------------------------
@@ -220,7 +308,6 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
 {
     switch (isa)
     {
-        case InstructionSet_SSE:
         case InstructionSet_SSE2:
         case InstructionSet_SSE3:
         case InstructionSet_SSSE3:
@@ -235,6 +322,7 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
         case InstructionSet_PCLMULQDQ:
             return false;
 
+        case InstructionSet_SSE:
         case InstructionSet_LZCNT:
         case InstructionSet_POPCNT:
             return true;
@@ -286,6 +374,15 @@ bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
                                                     isFullyImplmentedISAClass(isa));
 }
 
+static bool isTypeSupportedForIntrinsic(var_types type)
+{
+#ifdef _TARGET_X86_
+    return !varTypeIsLong(type);
+#else
+    return true;
+#endif
+}
+
 //------------------------------------------------------------------------
 // impUnsupportedHWIntrinsic: returns a node for an unsupported HWIntrinsic
 //
@@ -331,8 +428,22 @@ GenTree* Compiler::impUnsupportedHWIntrinsic(unsigned              helper,
 }
 
 //------------------------------------------------------------------------
+// impIsTableDrivenHWIntrinsic:
+//
+// Arguments:
+//    category - category of a HW intrinsic
+//
+// Return Value:
+//    returns true if this category can be table-driven in the importer
+//
+static bool impIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
+{
+    // HW_Flag_NoCodeGen implies this intrinsic should be manually morphed in the importer.
+    return category != HW_Category_Special && category != HW_Category_Scalar && (flags & HW_Flag_NoCodeGen) == 0;
+}
+
+//------------------------------------------------------------------------
 // impX86HWIntrinsic: dispatch hardware intrinsics to their own implementation
-// function
 //
 // Arguments:
 //    intrinsic -- id of the intrinsic function.
@@ -347,22 +458,151 @@ GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic        intrinsic,
                                      CORINFO_SIG_INFO*     sig,
                                      bool                  mustExpand)
 {
-    InstructionSet isa = isaOfHWIntrinsic(intrinsic);
+    InstructionSet      isa      = isaOfHWIntrinsic(intrinsic);
+    HWIntrinsicCategory category = categoryOfHWIntrinsic(intrinsic);
+    HWIntrinsicFlag     flags    = flagsOfHWIntrinsic(intrinsic);
+    int                 numArgs  = sig->numArgs;
+    var_types           retType  = JITtype2varType(sig->retType);
+    var_types           baseType = TYP_UNKNOWN;
+    if (retType == TYP_STRUCT && featureSIMD)
+    {
+        unsigned int sizeBytes;
+        baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
+        retType  = getSIMDTypeForSize(sizeBytes);
+        assert(sizeBytes != 0 && baseType != TYP_UNKNOWN);
+    }
 
     // This intrinsic is supported if
     // - the ISA is available on the underlying hardware (compSupports returns true)
     // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
-    bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa);
+    // - intrinsics do not require 64-bit registers (r64) on 32-bit platforms (isTypeSupportedForIntrinsic returns
+    // true)
+    bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa) && isTypeSupportedForIntrinsic(retType);
 
-    if (isIntrinsicAnIsSupportedPropertyGetter(intrinsic))
+    if (category == HW_Category_IsSupportedProperty)
     {
         return gtNewIconNode(issupported);
     }
+    // - calling to unsupported intrinsics must throw PlatforNotSupportedException
     else if (!issupported)
     {
         return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
     }
+    else if (category == HW_Category_IMM)
+    {
+        GenTree* lastOp = impStackTop().val;
+        if (!lastOp->IsCnsIntOrI() && !mustExpand)
+        {
+            // When the imm-argument is not a constant and we are not being forced to expand, we need to
+            // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
+            // intrinsic method is recursive and will be forced to expand, at which point
+            // we emit some less efficient fallback code.
+            return nullptr;
+        }
+    }
 
+    if ((flags & HW_Flag_Generic) != 0)
+    {
+        assert(baseType != TYP_UNKNOWN);
+        // When the type argument is not a numeric type (and we are not being forced to expand), we need to
+        // return nullptr so a GT_CALL to the intrinsic method is emitted that will throw NotSupportedException
+        if (!varTypeIsArithmetic(baseType))
+        {
+            assert(!mustExpand);
+            return nullptr;
+        }
+
+        if ((flags & HW_Flag_TwoTypeGeneric) != 0)
+        {
+            // StaticCast<T, U> has two type parameters.
+            assert(!mustExpand);
+            assert(numArgs == 1);
+            var_types srcType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
+            assert(srcType != TYP_UNKNOWN);
+            if (!varTypeIsArithmetic(srcType))
+            {
+                return nullptr;
+            }
+        }
+    }
+
+    // table-driven importer of simple intrinsics
+    if (impIsTableDrivenHWIntrinsic(category, flags))
+    {
+        if (!varTypeIsSIMD(retType))
+        {
+            if (retType != TYP_VOID)
+            {
+                baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
+            }
+            else
+            {
+                assert(category == HW_Category_MemoryStore);
+                baseType =
+                    getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, info.compCompHnd->getArgNext(sig->args)));
+            }
+
+            assert(baseType != TYP_UNKNOWN);
+        }
+
+        unsigned                simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
+        CORINFO_ARG_LIST_HANDLE argList  = sig->args;
+        CORINFO_CLASS_HANDLE    argClass;
+        var_types               argType = TYP_UNKNOWN;
+
+        assert(numArgs >= 0);
+        assert(insOfHWIntrinsic(intrinsic, baseType) != INS_invalid);
+        assert(simdSize == 32 || simdSize == 16);
+
+        GenTree* retNode = nullptr;
+        GenTree* op1     = nullptr;
+        GenTree* op2     = nullptr;
+
+        switch (numArgs)
+        {
+            case 0:
+                retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
+                break;
+            case 1:
+                argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+                op1     = getArgForHWIntrinsic(argType, argClass);
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
+                break;
+            case 2:
+                argType = JITtype2varType(
+                    strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(argList), &argClass)));
+                op2 = getArgForHWIntrinsic(argType, argClass);
+
+                argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+                op1     = getArgForHWIntrinsic(argType, argClass);
+
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, baseType, simdSize);
+                break;
+
+            case 3:
+            {
+                CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
+                CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
+
+                argType      = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
+                GenTree* op3 = getArgForHWIntrinsic(argType, argClass);
+
+                argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
+                op2     = getArgForHWIntrinsic(argType, argClass);
+
+                argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+                op1     = getArgForHWIntrinsic(argType, argClass);
+
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);
+                break;
+            }
+            default:
+                unreached();
+        }
+        return retNode;
+    }
+
+    // other intrinsics need special importation
     switch (isa)
     {
         case InstructionSet_SSE:
@@ -468,11 +708,13 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
                                    CORINFO_SIG_INFO*     sig,
                                    bool                  mustExpand)
 {
-    GenTree* retNode = nullptr;
-    GenTree* op1     = nullptr;
-    GenTree* op2     = nullptr;
-    GenTree* op3     = nullptr;
-    GenTree* op4     = nullptr;
+    GenTree* retNode  = nullptr;
+    GenTree* op1      = nullptr;
+    GenTree* op2      = nullptr;
+    GenTree* op3      = nullptr;
+    GenTree* op4      = nullptr;
+    int      simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
+    assert(simdSize == 16);
 
     switch (intrinsic)
     {
@@ -486,112 +728,14 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
             op2 = impPopStack().val;
             op1 = impPopStack().val;
 
-            GenTree* left    = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, 16);
-            GenTree* right   = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, 16);
+            GenTree* left    = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, simdSize);
+            GenTree* right   = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, simdSize);
             GenTree* control = gtNewIconNode(68, TYP_UBYTE);
 
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, 16);
+            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, simdSize);
             break;
         }
 
-        case NI_SSE_Shuffle:
-        {
-            assert(sig->numArgs == 3);
-            assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
-
-            op3 = impStackTop().val;
-
-            if (op3->IsCnsIntOrI() || mustExpand)
-            {
-                impPopStack(); // Pop the value we peeked at
-                op2     = impSIMDPopStack(TYP_SIMD16);
-                op1     = impSIMDPopStack(TYP_SIMD16);
-                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsic, TYP_FLOAT, 16);
-            }
-            else
-            {
-                // When op3 is not a constant and we are not being forced to expand, we need to
-                // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
-                // intrinsic method is recursive and will be forced to expand, at which point
-                // we emit some less efficient fallback code.
-
-                return nullptr;
-            }
-            break;
-        }
-
-        case NI_SSE_Add:
-        case NI_SSE_AddScalar:
-        case NI_SSE_And:
-        case NI_SSE_AndNot:
-        case NI_SSE_CompareEqual:
-        case NI_SSE_CompareEqualScalar:
-        case NI_SSE_CompareGreaterThan:
-        case NI_SSE_CompareGreaterThanScalar:
-        case NI_SSE_CompareGreaterThanOrEqual:
-        case NI_SSE_CompareGreaterThanOrEqualScalar:
-        case NI_SSE_CompareLessThan:
-        case NI_SSE_CompareLessThanScalar:
-        case NI_SSE_CompareLessThanOrEqual:
-        case NI_SSE_CompareLessThanOrEqualScalar:
-        case NI_SSE_CompareNotEqual:
-        case NI_SSE_CompareNotEqualScalar:
-        case NI_SSE_CompareNotGreaterThan:
-        case NI_SSE_CompareNotGreaterThanScalar:
-        case NI_SSE_CompareNotGreaterThanOrEqual:
-        case NI_SSE_CompareNotGreaterThanOrEqualScalar:
-        case NI_SSE_CompareNotLessThan:
-        case NI_SSE_CompareNotLessThanScalar:
-        case NI_SSE_CompareNotLessThanOrEqual:
-        case NI_SSE_CompareNotLessThanOrEqualScalar:
-        case NI_SSE_CompareOrdered:
-        case NI_SSE_CompareOrderedScalar:
-        case NI_SSE_CompareUnordered:
-        case NI_SSE_CompareUnorderedScalar:
-        case NI_SSE_Divide:
-        case NI_SSE_DivideScalar:
-        case NI_SSE_Max:
-        case NI_SSE_MaxScalar:
-        case NI_SSE_Min:
-        case NI_SSE_MinScalar:
-        case NI_SSE_MoveHighToLow:
-        case NI_SSE_MoveLowToHigh:
-        case NI_SSE_MoveScalar:
-        case NI_SSE_Multiply:
-        case NI_SSE_MultiplyScalar:
-        case NI_SSE_Or:
-        case NI_SSE_Subtract:
-        case NI_SSE_SubtractScalar:
-        case NI_SSE_UnpackHigh:
-        case NI_SSE_UnpackLow:
-        case NI_SSE_Xor:
-            assert(sig->numArgs == 2);
-            assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
-            op2     = impSIMDPopStack(TYP_SIMD16);
-            op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
-            break;
-
-        case NI_SSE_CompareEqualOrderedScalar:
-        case NI_SSE_CompareEqualUnorderedScalar:
-        case NI_SSE_CompareGreaterThanOrderedScalar:
-        case NI_SSE_CompareGreaterThanUnorderedScalar:
-        case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
-        case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
-        case NI_SSE_CompareLessThanOrderedScalar:
-        case NI_SSE_CompareLessThanUnorderedScalar:
-        case NI_SSE_CompareLessThanOrEqualOrderedScalar:
-        case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
-        case NI_SSE_CompareNotEqualOrderedScalar:
-        case NI_SSE_CompareNotEqualUnorderedScalar:
-            assert(sig->numArgs == 2);
-            assert(JITtype2varType(sig->retType) == TYP_BOOL);
-            assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
-            op2     = impSIMDPopStack(TYP_SIMD16);
-            op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_BOOL, op1, op2, intrinsic, TYP_FLOAT, 16);
-            break;
-
         case NI_SSE_ConvertToVector128SingleScalar:
         {
             assert(sig->numArgs == 2);
@@ -612,7 +756,7 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
 
             op2     = impPopStack().val;
             op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
+            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, simdSize);
             break;
         }
 
@@ -621,74 +765,15 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
             assert(JITtype2varType(sig->retType) == TYP_INT);
             assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
             op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, 16);
+            retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, simdSize);
             break;
 
-        case NI_SSE_StaticCast:
-        {
-            assert(sig->numArgs == 1);
-            var_types tgtType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
-            var_types srcType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
-
-            if (isNumericType(tgtType) && isNumericType(srcType))
-            {
-                op1     = impSIMDPopStack(TYP_SIMD16);
-                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, tgtType, 16);
-            }
-            else
-            {
-                return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-            }
-            break;
-        }
-
         case NI_SSE_SetAllVector128:
-        case NI_SSE_SetScalar:
             assert(sig->numArgs == 1);
             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
             op1     = impPopStack().val;
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16);
-            break;
-
-        case NI_SSE_Reciprocal:
-        case NI_SSE_ReciprocalScalar:
-        case NI_SSE_ReciprocalSqrt:
-        case NI_SSE_ReciprocalSqrtScalar:
-        case NI_SSE_Sqrt:
-        case NI_SSE_SqrtScalar:
-            assert(sig->numArgs == 1);
-            assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
-            op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16);
-            break;
-
-        case NI_SSE_ConvertToInt32:
-        case NI_SSE_ConvertToInt32WithTruncation:
-        case NI_SSE_ConvertToInt64:
-        case NI_SSE_ConvertToInt64WithTruncation:
-        case NI_SSE_ConvertToSingle:
-        {
-            assert(sig->numArgs == 1);
-            assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
-            var_types callType = JITtype2varType(sig->retType);
-
-#ifdef _TARGET_X86_
-            if (varTypeIsLong(callType))
-            {
-                assert(intrinsic == NI_SSE_ConvertToInt64 || intrinsic == NI_SSE_ConvertToInt64WithTruncation);
-                return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-            }
-#endif // _TARGET_X86_
-
-            op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(callType, op1, intrinsic, TYP_FLOAT, 16);
-            break;
-        }
-
-        case NI_SSE_SetZeroVector128:
-            assert(sig->numArgs == 0);
-            assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, intrinsic, TYP_FLOAT, 16);
+            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtCloneExpr(op1), gtNewIconNode(0), NI_SSE_Shuffle,
+                                               TYP_FLOAT, simdSize);
             break;
 
         default:
@@ -709,14 +794,6 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
     var_types baseType = TYP_UNKNOWN;
     switch (intrinsic)
     {
-        case NI_SSE2_Add:
-            assert(sig->numArgs == 2);
-            op2      = impSIMDPopStack(TYP_SIMD16);
-            op1      = impSIMDPopStack(TYP_SIMD16);
-            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
-            retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_Add, baseType, 16);
-            break;
-
         default:
             JITDUMP("Not implemented hardware intrinsic");
             break;
@@ -758,26 +835,17 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic        intrinsic,
     GenTree*  op2      = nullptr;
     var_types callType = JITtype2varType(sig->retType);
 
-    CORINFO_ARG_LIST_HANDLE argLst = sig->args;
+    CORINFO_ARG_LIST_HANDLE argList = sig->args;
     CORINFO_CLASS_HANDLE    argClass;
     CorInfoType             corType;
     switch (intrinsic)
     {
         case NI_SSE42_Crc32:
             assert(sig->numArgs == 2);
-
-#ifdef _TARGET_X86_
-            if (varTypeIsLong(callType))
-            {
-                return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-            }
-#endif
-
-            op2 = impPopStack().val;
-            op1 = impPopStack().val;
-
-            argLst  = info.compCompHnd->getArgNext(argLst);                        // the second argument
-            corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass)); // type of the second argument
+            op2     = impPopStack().val;
+            op1     = impPopStack().val;
+            argList = info.compCompHnd->getArgNext(argList);                        // the second argument
+            corType = strip(info.compCompHnd->getArgType(sig, argList, &argClass)); // type of the second argument
 
             retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, NI_SSE42_Crc32);
 
@@ -804,14 +872,6 @@ GenTree* Compiler::impAVXIntrinsic(NamedIntrinsic        intrinsic,
     var_types baseType = TYP_UNKNOWN;
     switch (intrinsic)
     {
-        case NI_AVX_Add:
-            assert(sig->numArgs == 2);
-            op2      = impSIMDPopStack(TYP_SIMD32);
-            op1      = impSIMDPopStack(TYP_SIMD32);
-            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
-            retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX_Add, baseType, 32);
-            break;
-
         default:
             JITDUMP("Not implemented hardware intrinsic");
             break;
@@ -830,14 +890,6 @@ GenTree* Compiler::impAVX2Intrinsic(NamedIntrinsic        intrinsic,
     var_types baseType = TYP_UNKNOWN;
     switch (intrinsic)
     {
-        case NI_AVX2_Add:
-            assert(sig->numArgs == 2);
-            op2      = impSIMDPopStack(TYP_SIMD32);
-            op1      = impSIMDPopStack(TYP_SIMD32);
-            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
-            retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX2_Add, baseType, 32);
-            break;
-
         default:
             JITDUMP("Not implemented hardware intrinsic");
             break;
@@ -884,14 +936,6 @@ GenTree* Compiler::impLZCNTIntrinsic(NamedIntrinsic        intrinsic,
 {
     assert(sig->numArgs == 1);
     var_types callType = JITtype2varType(sig->retType);
-
-#ifdef _TARGET_X86_
-    if (varTypeIsLong(callType))
-    {
-        return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-    }
-#endif
-
     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_LZCNT_LeadingZeroCount);
 }
 
@@ -910,14 +954,6 @@ GenTree* Compiler::impPOPCNTIntrinsic(NamedIntrinsic        intrinsic,
 {
     assert(sig->numArgs == 1);
     var_types callType = JITtype2varType(sig->retType);
-
-#ifdef _TARGET_X86_
-    if (varTypeIsLong(callType))
-    {
-        return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-    }
-#endif
-
     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_POPCNT_PopCount);
 }