Adding support for vector constants via GenTreeVecCon (#68874)
authorTanner Gooding <tagoo@outlook.com>
Tue, 31 May 2022 21:21:33 +0000 (14:21 -0700)
committerGitHub <noreply@github.com>
Tue, 31 May 2022 21:21:33 +0000 (14:21 -0700)
* Adding support for vector constants via GenTreeVecCon

* Responding to PR feedback

* Support tracking the underlying simdBaseJitType for GenTreeVecCon

* Applying formatting patch

* Handle the scenario where TYP_SIMD8 is retyped to TYP_LONG or TYP_DOUBLE

* Ensure GenTreeVecCon tracks the simdSize so that TYP_SIMD8 ABI retyping still works

* Ensure the create call and not the cloned local is lowered

* Resolving PR feedback

* Ensure morph and rationalize check `OBJ(ADDR(CNS_VEC))`

* Update src/coreclr/jit/gentree.cpp

Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
45 files changed:
src/coreclr/jit/assertionprop.cpp
src/coreclr/jit/clrjit.natvis
src/coreclr/jit/codegenarm.cpp
src/coreclr/jit/codegenarm64.cpp
src/coreclr/jit/codegenarmarch.cpp
src/coreclr/jit/codegenlinear.cpp
src/coreclr/jit/codegenxarch.cpp
src/coreclr/jit/compiler.h
src/coreclr/jit/compiler.hpp
src/coreclr/jit/emit.cpp
src/coreclr/jit/emit.h
src/coreclr/jit/fgbasic.cpp
src/coreclr/jit/fginline.cpp
src/coreclr/jit/gentree.cpp
src/coreclr/jit/gentree.h
src/coreclr/jit/gtlist.h
src/coreclr/jit/gtstructs.h
src/coreclr/jit/hwintrinsicarm64.cpp
src/coreclr/jit/hwintrinsiccodegenarm64.cpp
src/coreclr/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/jit/hwintrinsiclistarm64.h
src/coreclr/jit/hwintrinsiclistxarch.h
src/coreclr/jit/hwintrinsicxarch.cpp
src/coreclr/jit/importer.cpp
src/coreclr/jit/importer_vectorization.cpp
src/coreclr/jit/instr.cpp
src/coreclr/jit/liveness.cpp
src/coreclr/jit/lower.cpp
src/coreclr/jit/lower.h
src/coreclr/jit/lowerarmarch.cpp
src/coreclr/jit/lowerloongarch64.cpp
src/coreclr/jit/lowerxarch.cpp
src/coreclr/jit/lsraarm64.cpp
src/coreclr/jit/lsrabuild.cpp
src/coreclr/jit/morph.cpp
src/coreclr/jit/morphblock.cpp
src/coreclr/jit/optcse.cpp
src/coreclr/jit/rationalize.cpp
src/coreclr/jit/simd.h
src/coreclr/jit/simdashwintrinsic.cpp
src/coreclr/jit/simdashwintrinsiclistarm64.h
src/coreclr/jit/simdashwintrinsiclistxarch.h
src/coreclr/jit/simdcodegenxarch.cpp
src/coreclr/jit/valuenum.cpp
src/coreclr/jit/valuenum.h

index f0a5386..b80c428 100644 (file)
@@ -2958,8 +2958,8 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree)
     ValueNumPair vnPair = tree->gtVNPair;
     ValueNum     vnCns  = vnStore->VNConservativeNormalValue(vnPair);
 
-    // Check if node evaluates to a constant or Vector.Zero.
-    if (!vnStore->IsVNConstant(vnCns) && !vnStore->IsVNVectorZero(vnCns))
+    // Check if node evaluates to a constant
+    if (!vnStore->IsVNConstant(vnCns))
     {
         return nullptr;
     }
@@ -3118,23 +3118,52 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree)
         }
         break;
 
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_SIMD
         case TYP_SIMD8:
+        {
+            simd8_t value = vnStore->ConstantValue<simd8_t>(vnCns);
+
+            GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+            vecCon->gtSimd8Val    = value;
+
+            conValTree = vecCon;
+            break;
+        }
+
         case TYP_SIMD12:
+        {
+            simd12_t value = vnStore->ConstantValue<simd12_t>(vnCns);
+
+            GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+            vecCon->gtSimd12Val   = value;
+
+            conValTree = vecCon;
+            break;
+        }
+
         case TYP_SIMD16:
+        {
+            simd16_t value = vnStore->ConstantValue<simd16_t>(vnCns);
+
+            GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+            vecCon->gtSimd16Val   = value;
+
+            conValTree = vecCon;
+            break;
+        }
+
         case TYP_SIMD32:
         {
-            assert(vnStore->IsVNVectorZero(vnCns));
-            VNSimdTypeInfo vnInfo = vnStore->GetVectorZeroSimdTypeOfVN(vnCns);
+            simd32_t value = vnStore->ConstantValue<simd32_t>(vnCns);
 
-            assert(vnInfo.m_simdBaseJitType != CORINFO_TYPE_UNDEF);
-            assert(vnInfo.m_simdSize != 0);
-            assert(getSIMDTypeForSize(vnInfo.m_simdSize) == vnStore->TypeOfVN(vnCns));
+            GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+            vecCon->gtSimd32Val   = value;
 
-            conValTree = gtNewSimdZeroNode(tree->TypeGet(), vnInfo.m_simdBaseJitType, vnInfo.m_simdSize, true);
+            conValTree = vecCon;
+            break;
         }
         break;
-#endif
+#endif // FEATURE_SIMD
 
         case TYP_BYREF:
             // Do not support const byref optimization.
@@ -5608,8 +5637,7 @@ struct VNAssertionPropVisitorInfo
 //
 GenTree* Compiler::optExtractSideEffListFromConst(GenTree* tree)
 {
-    assert(vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair)) ||
-           vnStore->IsVNVectorZero(vnStore->VNConservativeNormalValue(tree->gtVNPair)));
+    assert(vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair)));
 
     GenTree* sideEffList = nullptr;
 
index d3eee9a..5873fca 100644 (file)
@@ -50,6 +50,9 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u
   <Type Name="GenTreeStrCon">
     <DisplayString>CNS_STR</DisplayString>
   </Type>
+  <Type Name="GenTreeVecCon">
+    <DisplayString>CNS_VEC</DisplayString>
+  </Type>
   <Type Name="GenTreeLngCon">
     <DisplayString>{gtTreeID, d}: [[LngCon={((GenTreeLngCon*)this)-&gt;gtLconVal, l}]</DisplayString>
   </Type>
index f3674d3..3a8c00b 100644 (file)
@@ -287,6 +287,11 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
         }
         break;
 
+        case GT_CNS_VEC:
+        {
+            unreached();
+        }
+
         default:
             unreached();
     }
index d03ac91..5fb190f 100644 (file)
@@ -2314,6 +2314,77 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
         }
         break;
 
+        case GT_CNS_VEC:
+        {
+            GenTreeVecCon* vecCon = tree->AsVecCon();
+
+            emitter* emit = GetEmitter();
+            emitAttr attr = emitTypeSize(targetType);
+
+            switch (tree->TypeGet())
+            {
+#if defined(FEATURE_SIMD)
+                case TYP_LONG:
+                case TYP_DOUBLE:
+                case TYP_SIMD8:
+                {
+                    // TODO-1stClassStructs: do not retype SIMD nodes
+
+                    if (vecCon->IsAllBitsSet())
+                    {
+                        emit->emitIns_R_I(INS_mvni, attr, targetReg, 0, INS_OPTS_2S);
+                    }
+                    else if (vecCon->IsZero())
+                    {
+                        emit->emitIns_R_I(INS_movi, attr, targetReg, 0, INS_OPTS_2S);
+                    }
+                    else
+                    {
+                        // Get a temp integer register to compute long address.
+                        regNumber addrReg = tree->GetSingleTempReg();
+
+                        simd8_t              constValue = vecCon->gtSimd8Val;
+                        CORINFO_FIELD_HANDLE hnd        = emit->emitSimd8Const(constValue);
+
+                        emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0);
+                    }
+                    break;
+                }
+
+                case TYP_SIMD12:
+                case TYP_SIMD16:
+                {
+                    if (vecCon->IsAllBitsSet())
+                    {
+                        emit->emitIns_R_I(INS_mvni, attr, targetReg, 0, INS_OPTS_4S);
+                    }
+                    else if (vecCon->IsZero())
+                    {
+                        emit->emitIns_R_I(INS_movi, attr, targetReg, 0, INS_OPTS_4S);
+                    }
+                    else
+                    {
+                        // Get a temp integer register to compute long address.
+                        regNumber addrReg = tree->GetSingleTempReg();
+
+                        simd16_t             constValue = vecCon->gtSimd16Val;
+                        CORINFO_FIELD_HANDLE hnd        = emit->emitSimd16Const(constValue);
+
+                        emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0);
+                    }
+                    break;
+                }
+#endif // FEATURE_SIMD
+
+                default:
+                {
+                    unreached();
+                }
+            }
+
+            break;
+        }
+
         default:
             unreached();
     }
@@ -2548,10 +2619,18 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
     }
     else if (data->isContained())
     {
-        assert(data->OperIs(GT_BITCAST));
-        const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
-        assert(!bitcastSrc->isContained());
-        dataReg = bitcastSrc->GetRegNum();
+        if (data->IsCnsVec())
+        {
+            assert(data->AsVecCon()->IsZero());
+            dataReg = REG_ZR;
+        }
+        else
+        {
+            assert(data->OperIs(GT_BITCAST));
+            const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
+            assert(!bitcastSrc->isContained());
+            dataReg = bitcastSrc->GetRegNum();
+        }
     }
     else
     {
@@ -2629,7 +2708,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
         if (data->isContained())
         {
             // This is only possible for a zero-init or bitcast.
-            const bool zeroInit = (data->IsIntegralConst(0) || data->IsSIMDZero());
+            const bool zeroInit = (data->IsIntegralConst(0) || data->IsVectorZero());
             assert(zeroInit || data->OperIs(GT_BITCAST));
 
             if (zeroInit && varTypeIsSIMD(targetType))
@@ -4249,7 +4328,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
         assert(!op1->isContained());
         assert(op1Type == op2Type);
 
-        if (op2->IsFPZero())
+        if (op2->IsFloatPositiveZero())
         {
             assert(op2->isContained());
             emit->emitIns_R_F(INS_fcmp, cmpSize, op1->GetRegNum(), 0.0);
@@ -5088,7 +5167,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
     if (op1->isContained())
     {
         // This is only possible for a zero-init.
-        assert(op1->IsIntegralConst(0) || op1->IsSIMDZero());
+        assert(op1->IsIntegralConst(0) || op1->IsVectorZero());
 
         // store lower 8 bytes
         GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, REG_ZR, varNum, offs);
index f1426fc..3ed9a59 100644 (file)
@@ -188,6 +188,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
 
         case GT_CNS_INT:
         case GT_CNS_DBL:
+        case GT_CNS_VEC:
             genSetRegToConst(targetReg, targetType, treeNode);
             genProduceReg(treeNode);
             break;
index 6212a52..d89217c 100644 (file)
@@ -1660,7 +1660,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
 #ifdef FEATURE_SIMD
             // (In)Equality operation that produces bool result, when compared
             // against Vector zero, marks its Vector Zero operand as contained.
-            assert(tree->OperIsLeaf() || tree->IsSIMDZero() || tree->IsVectorZero());
+            assert(tree->OperIsLeaf() || tree->IsVectorZero());
 #else
             assert(tree->OperIsLeaf());
 #endif
index bab2c05..97c884f 100644 (file)
@@ -451,8 +451,8 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
 /***********************************************************************************
  *
  * Generate code to set a register 'targetReg' of type 'targetType' to the constant
- * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
- * genProduceReg() on the target register.
+ * specified by the constant (GT_CNS_INT, GT_CNS_DBL, or GT_CNS_VEC) in 'tree'. This
+ * does not call genProduceReg() on the target register.
  */
 void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
 {
@@ -507,6 +507,78 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
         }
         break;
 
+        case GT_CNS_VEC:
+        {
+            GenTreeVecCon* vecCon = tree->AsVecCon();
+
+            emitter* emit = GetEmitter();
+            emitAttr attr = emitTypeSize(targetType);
+
+            if (vecCon->IsAllBitsSet())
+            {
+#if defined(FEATURE_SIMD)
+                emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
+#else
+                emit->emitIns_R_R(INS_pcmpeqd, attr, targetReg, targetReg);
+#endif // FEATURE_SIMD
+                break;
+            }
+
+            if (vecCon->IsZero())
+            {
+#if defined(FEATURE_SIMD)
+                emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg);
+#else
+                emit->emitIns_R_R(INS_xorps, attr, targetReg, targetReg);
+#endif // FEATURE_SIMD
+                break;
+            }
+
+            switch (tree->TypeGet())
+            {
+#if defined(FEATURE_SIMD)
+                case TYP_LONG:
+                case TYP_DOUBLE:
+                case TYP_SIMD8:
+                {
+                    // TODO-1stClassStructs: do not retype SIMD nodes
+
+                    simd8_t              constValue = vecCon->gtSimd8Val;
+                    CORINFO_FIELD_HANDLE hnd        = emit->emitSimd8Const(constValue);
+
+                    emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
+                    break;
+                }
+
+                case TYP_SIMD12:
+                case TYP_SIMD16:
+                {
+                    simd16_t             constValue = vecCon->gtSimd16Val;
+                    CORINFO_FIELD_HANDLE hnd        = emit->emitSimd16Const(constValue);
+
+                    emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
+                    break;
+                }
+
+                case TYP_SIMD32:
+                {
+                    simd32_t             constValue = vecCon->gtSimd32Val;
+                    CORINFO_FIELD_HANDLE hnd        = emit->emitSimd32Const(constValue);
+
+                    emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
+                    break;
+                }
+#endif // FEATURE_SIMD
+
+                default:
+                {
+                    unreached();
+                }
+            }
+
+            break;
+        }
+
         default:
             unreached();
     }
@@ -1495,6 +1567,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             genProduceReg(treeNode);
             break;
 
+        case GT_CNS_VEC:
+            genSetRegToConst(targetReg, targetType, treeNode);
+            genProduceReg(treeNode);
+            break;
+
         case GT_NOT:
         case GT_NEG:
             genCodeForNegNot(treeNode);
@@ -4858,7 +4935,8 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
             // zero in the target register, because an xor is smaller than a copy. Note that we could
             // potentially handle this in the register allocator, but we can't always catch it there
             // because the target may not have a register allocated for it yet.
-            if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) && (op1->IsIntegralConst(0) || op1->IsFPZero()))
+            if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) &&
+                (op1->IsIntegralConst(0) || op1->IsFloatPositiveZero()))
             {
                 op1->SetRegNum(REG_NA);
                 op1->ResetReuseRegVal();
index c519e04..869224e 100644 (file)
@@ -2283,16 +2283,18 @@ public:
 
     GenTree* gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle);
 
+    GenTreeVecCon* gtNewVconNode(var_types type, CorInfoType simdBaseJitType);
+
+    GenTree* gtNewAllBitsSetConNode(var_types type);
+    GenTree* gtNewAllBitsSetConNode(var_types type, CorInfoType simdBaseJitType);
+
     GenTree* gtNewZeroConNode(var_types type);
+    GenTree* gtNewZeroConNode(var_types type, CorInfoType simdBaseJitType);
 
     GenTree* gtNewOneConNode(var_types type);
 
     GenTreeLclVar* gtNewStoreLclVar(unsigned dstLclNum, GenTree* src);
 
-#ifdef FEATURE_SIMD
-    GenTree* gtNewSIMDVectorZero(var_types simdType, CorInfoType simdBaseJitType, unsigned simdSize);
-#endif
-
     GenTree* gtNewBlkOpNode(GenTree* dst, GenTree* srcOrFillVal, bool isVolatile, bool isCopyBlock);
 
     GenTree* gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg);
@@ -2557,11 +2559,6 @@ public:
                                       unsigned    simdSize,
                                       bool        isSimdAsHWIntrinsic);
 
-    GenTree* gtNewSimdZeroNode(var_types   type,
-                               CorInfoType simdBaseJitType,
-                               unsigned    simdSize,
-                               bool        isSimdAsHWIntrinsic);
-
     GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID);
     GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID);
     GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types      type,
@@ -8197,9 +8194,6 @@ private:
 
     SIMDHandlesCache* m_simdHandleCache;
 
-    // Get an appropriate "zero" for the given type and class handle.
-    GenTree* gtGetSIMDZero(var_types simdType, CorInfoType simdBaseJitType, CORINFO_CLASS_HANDLE simdHandle);
-
     // Get the handle for a SIMD type.
     CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, CorInfoType simdBaseJitType)
     {
@@ -8280,7 +8274,6 @@ private:
             clsHnd = gtGetStructHandleForHWSIMD(simdType, simdBaseJitType);
         }
 
-        assert(clsHnd != NO_CLASS_HANDLE);
         return clsHnd;
     }
 #endif // FEATURE_HW_INTRINSICS
@@ -10665,6 +10658,7 @@ public:
             case GT_CNS_LNG:
             case GT_CNS_DBL:
             case GT_CNS_STR:
+            case GT_CNS_VEC:
             case GT_MEMORYBARRIER:
             case GT_JMP:
             case GT_JCC:
index c790e2b..bda0cc6 100644 (file)
@@ -4202,6 +4202,7 @@ void GenTree::VisitOperands(TVisitor visitor)
         case GT_CNS_LNG:
         case GT_CNS_DBL:
         case GT_CNS_STR:
+        case GT_CNS_VEC:
         case GT_MEMORYBARRIER:
         case GT_JMP:
         case GT_JCC:
index 9516058..e94c560 100644 (file)
@@ -7294,6 +7294,90 @@ CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(double constValue, emitAttr attr
     return emitComp->eeFindJitDataOffs(cnum);
 }
 
+//------------------------------------------------------------------------
+// emitSimd8Const: Create a simd8 data section constant.
+//
+// Arguments:
+//    constValue - constant value
+//
+// Return Value:
+//    A field handle representing the data offset to access the constant.
+//
+CORINFO_FIELD_HANDLE emitter::emitSimd8Const(simd8_t constValue)
+{
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_SIMD)
+    unsigned cnsSize  = 8;
+    unsigned cnsAlign = cnsSize;
+
+#ifdef TARGET_XARCH
+    if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+    {
+        cnsAlign = dataSection::MIN_DATA_ALIGN;
+    }
+#endif // TARGET_XARCH
+
+    UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD8);
+    return emitComp->eeFindJitDataOffs(cnum);
+#else
+    unreached();
+#endif // !FEATURE_SIMD
+}
+
+CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue)
+{
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_SIMD)
+    unsigned cnsSize  = 16;
+    unsigned cnsAlign = cnsSize;
+
+#ifdef TARGET_XARCH
+    if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+    {
+        cnsAlign = dataSection::MIN_DATA_ALIGN;
+    }
+#endif // TARGET_XARCH
+
+    UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD16);
+    return emitComp->eeFindJitDataOffs(cnum);
+#else
+    unreached();
+#endif // !FEATURE_SIMD
+}
+
+CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue)
+{
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_SIMD)
+    unsigned cnsSize  = 32;
+    unsigned cnsAlign = cnsSize;
+
+#ifdef TARGET_XARCH
+    if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+    {
+        cnsAlign = dataSection::MIN_DATA_ALIGN;
+    }
+#endif // TARGET_XARCH
+
+    UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD32);
+    return emitComp->eeFindJitDataOffs(cnum);
+#else
+    unreached();
+#endif // !FEATURE_SIMD
+}
+
 /*****************************************************************************
  *
  *  Output the given data section at the specified address.
index b93fded..18d57d1 100644 (file)
@@ -1928,6 +1928,9 @@ public:
 
 private:
     CORINFO_FIELD_HANDLE emitFltOrDblConst(double constValue, emitAttr attr);
+    CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue);
+    CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue);
+    CORINFO_FIELD_HANDLE emitSimd32Const(simd32_t constValue);
     regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src);
     regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2);
     void emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem);
index eff18c5..e69c309 100644 (file)
@@ -1213,22 +1213,12 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
                                 pushedStack.PushConstant();
                                 // TODO: check if it's a loop condition - we unroll such loops.
                                 break;
-                            case NI_Vector256_get_Zero:
-                            case NI_Vector256_get_AllBitsSet:
-                                foldableIntrinsic = true;
-                                pushedStack.PushUnknown();
-                                break;
 #elif defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS)
                             case NI_Vector64_get_Count:
                             case NI_Vector128_get_Count:
                                 foldableIntrinsic = true;
                                 pushedStack.PushConstant();
                                 break;
-                            case NI_Vector128_get_Zero:
-                            case NI_Vector128_get_AllBitsSet:
-                                foldableIntrinsic = true;
-                                pushedStack.PushUnknown();
-                                break;
 #endif
 
                             default:
index ae8119b..fe0746a 100644 (file)
@@ -310,8 +310,8 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call)
  */
 GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree)
 {
-    noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() ||
-                 tree->OperIsHWIntrinsic());
+    noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) ||
+                 tree->OperIsSimdOrHWintrinsic() || tree->IsCnsVec());
     // GT_CALL,     cannot get address of call.
     // GT_MKREFANY, inlining should've been aborted due to mkrefany opcode.
     // GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
index ea690b3..fd02c38 100644 (file)
@@ -252,6 +252,7 @@ void GenTree::InitNodeSize()
     }
 
     GenTree::s_gtNodeSizes[GT_CALL]          = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_CNS_VEC]       = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_CAST]          = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_FTN_ADDR]      = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_BOX]           = TREE_NODE_SZ_LARGE;
@@ -300,6 +301,7 @@ void GenTree::InitNodeSize()
     static_assert_no_msg(sizeof(GenTreeLngCon)       <= TREE_NODE_SZ_SMALL);
     static_assert_no_msg(sizeof(GenTreeDblCon)       <= TREE_NODE_SZ_SMALL);
     static_assert_no_msg(sizeof(GenTreeStrCon)       <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeVecCon)       <= TREE_NODE_SZ_LARGE); // *** large node
     static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL);
     static_assert_no_msg(sizeof(GenTreeLclVar)       <= TREE_NODE_SZ_SMALL);
     static_assert_no_msg(sizeof(GenTreeLclFld)       <= TREE_NODE_SZ_SMALL);
@@ -2352,6 +2354,15 @@ AGAIN:
                 }
                 break;
 
+            case GT_CNS_VEC:
+            {
+                if (GenTreeVecCon::Equals(op1->AsVecCon(), op2->AsVecCon()))
+                {
+                    return true;
+                }
+                break;
+            }
+
 #if 0
             // TODO-CQ: Enable this in the future
         case GT_CNS_LNG:
@@ -2815,6 +2826,57 @@ AGAIN:
                 add = tree->AsStrCon()->gtSconCPX;
                 break;
 
+            case GT_CNS_VEC:
+            {
+                GenTreeVecCon* vecCon = tree->AsVecCon();
+                add                   = 0;
+
+                switch (vecCon->TypeGet())
+                {
+#if defined(FEATURE_SIMD)
+                    case TYP_SIMD32:
+                    {
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[7]);
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[6]);
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[5]);
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[4]);
+                        FALLTHROUGH;
+                    }
+
+                    case TYP_SIMD16:
+                    {
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd16Val.u32[3]);
+                        FALLTHROUGH;
+                    }
+
+                    case TYP_SIMD12:
+                    {
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd12Val.u32[2]);
+                        FALLTHROUGH;
+                    }
+
+                    case TYP_SIMD8:
+                    case TYP_DOUBLE:
+                    case TYP_LONG:
+                    {
+                        // TODO-1stClassStructs: do not retype SIMD nodes
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[1]);
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[0]);
+                        break;
+                    }
+#endif // FEATURE_SIMD
+
+                    default:
+                    {
+                        unreached();
+                    }
+                }
+
+                add = genTreeHashAdd(ulo32(add), vecCon->GetSimdBaseType());
+                add = genTreeHashAdd(ulo32(add), vecCon->GetSimdSize());
+                break;
+            }
+
             case GT_JMP:
                 add = tree->AsVal()->gtVal1;
                 break;
@@ -4515,8 +4577,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
 #if defined(TARGET_XARCH)
                 /* We use fldz and fld1 to load 0.0 and 1.0, but all other  */
                 /* floating point constants are loaded using an indirection */
-                if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
-                    (*((__int64*)&(tree->AsDblCon()->gtDconVal)) == I64(0x3ff0000000000000)))
+                if (tree->IsFloatPositiveZero())
                 {
                     costEx = 1;
                     costSz = 1;
@@ -4540,8 +4601,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     costSz = 2 + 8;
                 }
 #elif defined(TARGET_ARM64)
-                if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
-                    emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal))
+                if (tree->IsFloatPositiveZero() || emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal))
                 {
                     costEx = 1;
                     costSz = 1;
@@ -4561,6 +4621,14 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
             }
             break;
 
+            case GT_CNS_VEC:
+            {
+                costEx = IND_COST_EX;
+                costSz = 4;
+                level  = 0;
+                break;
+            }
+
             case GT_LCL_VAR:
                 level = 1;
                 if (gtIsLikelyRegVar(tree))
@@ -5844,6 +5912,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse)
         case GT_CNS_LNG:
         case GT_CNS_DBL:
         case GT_CNS_STR:
+        case GT_CNS_VEC:
         case GT_MEMORYBARRIER:
         case GT_JMP:
         case GT_JCC:
@@ -6870,6 +6939,50 @@ GenTree* Compiler::gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle)
     return node;
 }
 
+GenTreeVecCon* Compiler::gtNewVconNode(var_types type, CorInfoType simdBaseJitType)
+{
+    GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type, simdBaseJitType, genTypeSize(type));
+    return vecCon;
+}
+
+GenTree* Compiler::gtNewAllBitsSetConNode(var_types type)
+{
+    GenTree* allBitsSet;
+
+    switch (type)
+    {
+        case TYP_INT:
+            allBitsSet = gtNewIconNode(-1);
+            break;
+
+        case TYP_LONG:
+            allBitsSet = gtNewLconNode(-1);
+            break;
+
+        default:
+            noway_assert(!"Bad type in gtNewAllBitsSetConNode");
+            allBitsSet = nullptr;
+            break;
+    }
+
+    return allBitsSet;
+}
+
+GenTree* Compiler::gtNewAllBitsSetConNode(var_types type, CorInfoType simdBaseJitType)
+{
+    assert(varTypeIsSIMD(type));
+    assert(simdBaseJitType != CORINFO_TYPE_UNDEF);
+
+    GenTreeVecCon* vecCon = gtNewVconNode(type, simdBaseJitType);
+
+    vecCon->gtSimd32Val.i64[0] = -1;
+    vecCon->gtSimd32Val.i64[1] = -1;
+    vecCon->gtSimd32Val.i64[2] = -1;
+    vecCon->gtSimd32Val.i64[3] = -1;
+
+    return vecCon;
+}
+
 GenTree* Compiler::gtNewZeroConNode(var_types type)
 {
     GenTree* zero;
@@ -6908,6 +7021,16 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
     return zero;
 }
 
+GenTree* Compiler::gtNewZeroConNode(var_types type, CorInfoType simdBaseJitType)
+{
+    assert(varTypeIsSIMD(type));
+    assert(simdBaseJitType != CORINFO_TYPE_UNDEF);
+
+    GenTreeVecCon* vecCon = gtNewVconNode(type, simdBaseJitType);
+    vecCon->gtSimd32Val   = {};
+    return vecCon;
+}
+
 GenTree* Compiler::gtNewOneConNode(var_types type)
 {
     GenTree* one;
@@ -6946,23 +7069,6 @@ GenTreeLclVar* Compiler::gtNewStoreLclVar(unsigned dstLclNum, GenTree* src)
     return store;
 }
 
-#ifdef FEATURE_SIMD
-//---------------------------------------------------------------------
-// gtNewSIMDVectorZero: create a GT_SIMD node for Vector<T>.Zero
-//
-// Arguments:
-//    simdType        -  simd vector type
-//    simdBaseJitType -  element type of vector
-//    simdSize        -  size of vector in bytes
-GenTree* Compiler::gtNewSIMDVectorZero(var_types simdType, CorInfoType simdBaseJitType, unsigned simdSize)
-{
-    var_types simdBaseType = genActualType(JitType2PreciseVarType(simdBaseJitType));
-    GenTree*  initVal      = gtNewZeroConNode(simdBaseType);
-    initVal->gtType        = simdBaseType;
-    return gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, simdSize);
-}
-#endif // FEATURE_SIMD
-
 GenTreeCall* Compiler::gtNewIndCallNode(GenTree* addr, var_types type, const DebugInfo& di)
 {
     return gtNewCallNode(CT_INDIRECT, (CORINFO_METHOD_HANDLE)addr, type, di);
@@ -7900,6 +8006,20 @@ GenTree* Compiler::gtClone(GenTree* tree, bool complexOK)
             copy = gtNewLconNode(tree->AsLngCon()->gtLconVal);
             break;
 
+        case GT_CNS_DBL:
+        {
+            copy = gtNewDconNode(tree->AsDblCon()->gtDconVal, tree->TypeGet());
+            break;
+        }
+
+        case GT_CNS_VEC:
+        {
+            GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), tree->AsVecCon()->GetSimdBaseJitType());
+            vecCon->gtSimd32Val   = tree->AsVecCon()->gtSimd32Val;
+            copy                  = vecCon;
+            break;
+        }
+
         case GT_LCL_VAR:
             copy = gtNewLclvNode(tree->AsLclVarCommon()->GetLclNum(),
                                  tree->TypeGet() DEBUGARG(tree->AsLclVar()->gtLclILoffs));
@@ -8063,14 +8183,23 @@ GenTree* Compiler::gtCloneExpr(
                 goto DONE;
 
             case GT_CNS_DBL:
-                copy         = gtNewDconNode(tree->AsDblCon()->gtDconVal);
-                copy->gtType = tree->gtType; // keep the same type
+            {
+                copy = gtNewDconNode(tree->AsDblCon()->gtDconVal, tree->TypeGet());
                 goto DONE;
+            }
 
             case GT_CNS_STR:
                 copy = gtNewSconNode(tree->AsStrCon()->gtSconCPX, tree->AsStrCon()->gtScpHnd);
                 goto DONE;
 
+            case GT_CNS_VEC:
+            {
+                GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), tree->AsVecCon()->GetSimdBaseJitType());
+                vecCon->gtSimd32Val   = tree->AsVecCon()->gtSimd32Val;
+                copy                  = vecCon;
+                goto DONE;
+            }
+
             case GT_LCL_VAR:
 
                 if (tree->AsLclVarCommon()->GetLclNum() == varNum)
@@ -9028,6 +9157,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
         case GT_CNS_LNG:
         case GT_CNS_DBL:
         case GT_CNS_STR:
+        case GT_CNS_VEC:
         case GT_MEMORYBARRIER:
         case GT_JMP:
         case GT_JCC:
@@ -11002,9 +11132,60 @@ void Compiler::gtDispConst(GenTree* tree)
                 printf(" %#.17g", tree->AsDblCon()->gtDconVal);
             }
             break;
+
         case GT_CNS_STR:
             printf("<string constant>");
             break;
+
+        case GT_CNS_VEC:
+        {
+            GenTreeVecCon* vecCon = tree->AsVecCon();
+
+            switch (vecCon->TypeGet())
+            {
+#if defined(FEATURE_SIMD)
+                case TYP_LONG:
+                case TYP_DOUBLE:
+                case TYP_SIMD8:
+                {
+                    // TODO-1stClassStructs: do not retype SIMD nodes
+                    simd8_t simdVal = vecCon->gtSimd8Val;
+                    printf("<0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1]);
+                    break;
+                }
+
+                case TYP_SIMD12:
+                {
+                    simd12_t simdVal = vecCon->gtSimd12Val;
+                    printf("<0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2]);
+                    break;
+                }
+
+                case TYP_SIMD16:
+                {
+                    simd16_t simdVal = vecCon->gtSimd16Val;
+                    printf("<0x%08x, 0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2],
+                           simdVal.u32[3]);
+                    break;
+                }
+
+                case TYP_SIMD32:
+                {
+                    simd32_t simdVal = vecCon->gtSimd32Val;
+                    printf("<0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx>", simdVal.u64[0], simdVal.u64[1],
+                           simdVal.u64[2], simdVal.u64[3]);
+                    break;
+                }
+#endif // FEATURE_SIMD
+
+                default:
+                {
+                    unreached();
+                }
+            }
+            break;
+        }
+
         default:
             assert(!"unexpected constant node");
     }
@@ -15025,13 +15206,14 @@ GenTree* Compiler::gtNewTempAssign(
     if (dstTyp == TYP_UNDEF)
     {
         varDsc->lvType = dstTyp = genActualType(valTyp);
+    }
+
 #if FEATURE_SIMD
-        if (varTypeIsSIMD(dstTyp))
-        {
-            varDsc->lvSIMDType = 1;
-        }
-#endif
+    if (varTypeIsSIMD(dstTyp))
+    {
+        varDsc->lvSIMDType = 1;
     }
+#endif
 
 #ifdef DEBUG
     // Make sure the actual types match.
@@ -16665,6 +16847,221 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
 }
 #endif // TARGET_X86
 
+#if defined(FEATURE_HW_INTRINSICS)
+//----------------------------------------------------------------------------------------------
+// IsHWIntrinsicCreateConstant: Determines if a HWIntrinsic node represents a vector constant
+//
+//  Arguments:
+//     node      - The node to check
+//     simd32Val - The vector constant being constructed
+//
+//  Returns:
+//     true if node represents a constant; otherwise, false
+bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val)
+{
+    var_types simdType     = node->TypeGet();
+    var_types simdBaseType = node->GetSimdBaseType();
+    unsigned  simdSize     = node->GetSimdSize();
+
+    size_t argCnt    = node->GetOperandCount();
+    size_t cnsArgCnt = 0;
+
+    switch (node->GetHWIntrinsicId())
+    {
+        case NI_Vector128_Create:
+#if defined(TARGET_XARCH)
+        case NI_Vector128_CreateScalarUnsafe:
+        case NI_Vector256_Create:
+        case NI_Vector256_CreateScalarUnsafe:
+#elif defined(TARGET_ARM64)
+        case NI_Vector64_Create:
+#endif
+        {
+            // These intrinsics are meant to set the same value to every element.
+            if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType))
+            {
+                // Now assign the rest of the arguments.
+                for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
+                {
+                    HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
+                }
+
+                cnsArgCnt = 1;
+            }
+            else
+            {
+                for (unsigned i = 1; i <= argCnt; i++)
+                {
+                    if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, simd32Val, simdBaseType))
+                    {
+                        cnsArgCnt++;
+                    }
+                }
+            }
+
+            assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
+            return argCnt == cnsArgCnt;
+        }
+
+        default:
+        {
+            return false;
+        }
+    }
+}
+
+//----------------------------------------------------------------------------------------------
+// HandleArgForHWIntrinsicCreate: Processes an argument for the GenTreeVecCon::IsHWIntrinsicCreateConstant method
+//
+//  Arguments:
+//     arg       - The argument to process
+//     argIdx    - The index of the argument being processed
+//     simd32Val - The vector constant being constructed
+//     baseType  - The base type of the vector constant
+//
+//  Returns:
+//     true if arg was a constant; otherwise, false
+bool GenTreeVecCon::HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, simd32_t& simd32Val, var_types baseType)
+{
+    switch (baseType)
+    {
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        {
+            if (arg->IsCnsIntOrI())
+            {
+                simd32Val.i8[argIdx] = static_cast<int8_t>(arg->AsIntCon()->gtIconVal);
+                return true;
+            }
+            else
+            {
+                // We expect the constant to have been already zeroed
+                assert(simd32Val.i8[argIdx] == 0);
+            }
+            break;
+        }
+
+        case TYP_SHORT:
+        case TYP_USHORT:
+        {
+            if (arg->IsCnsIntOrI())
+            {
+                simd32Val.i16[argIdx] = static_cast<int16_t>(arg->AsIntCon()->gtIconVal);
+                return true;
+            }
+            else
+            {
+                // We expect the constant to have been already zeroed
+                assert(simd32Val.i16[argIdx] == 0);
+            }
+            break;
+        }
+
+        case TYP_INT:
+        case TYP_UINT:
+        {
+            if (arg->IsCnsIntOrI())
+            {
+                simd32Val.i32[argIdx] = static_cast<int32_t>(arg->AsIntCon()->gtIconVal);
+                return true;
+            }
+            else
+            {
+                // We expect the constant to have been already zeroed
+                assert(simd32Val.i32[argIdx] == 0);
+            }
+            break;
+        }
+
+        case TYP_LONG:
+        case TYP_ULONG:
+        {
+#if defined(TARGET_64BIT)
+            if (arg->IsCnsIntOrI())
+            {
+                simd32Val.i64[argIdx] = static_cast<int64_t>(arg->AsIntCon()->gtIconVal);
+                return true;
+            }
+#else
+            if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI())
+            {
+                // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT
+                // We need to reconstruct the 64-bit value in order to handle this
+
+                INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal;
+                gtLconVal <<= 32;
+                gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal;
+
+                simd32Val.i64[argIdx] = gtLconVal;
+                return true;
+            }
+#endif // TARGET_64BIT
+            else
+            {
+                // We expect the constant to have been already zeroed
+                assert(simd32Val.i64[argIdx] == 0);
+            }
+            break;
+        }
+
+        case TYP_FLOAT:
+        {
+            if (arg->IsCnsFltOrDbl())
+            {
+                simd32Val.f32[argIdx] = static_cast<float>(arg->AsDblCon()->gtDconVal);
+                return true;
+            }
+            else
+            {
+                // We expect the constant to have been already zeroed
+                // We check against the i32, rather than f32, to account for -0.0
+                assert(simd32Val.i32[argIdx] == 0);
+            }
+            break;
+        }
+
+        case TYP_DOUBLE:
+        {
+            if (arg->IsCnsFltOrDbl())
+            {
+                simd32Val.f64[argIdx] = static_cast<double>(arg->AsDblCon()->gtDconVal);
+                return true;
+            }
+            else
+            {
+                // We expect the constant to have been already zeroed
+                // We check against the i64, rather than f64, to account for -0.0
+                assert(simd32Val.i64[argIdx] == 0);
+            }
+            break;
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+
+    return false;
+}
+#endif // FEATURE_HW_INTRINSICS
+
+//----------------------------------------------------------------------------------------------
+// GetSimdBaseType: Gets the var_type for the SimdBaseJitType of a GenTreeVecCon node
+//
+//  Returns:
+//     the var_type for the SimdBaseJitType of a GenTreeVecCon
+var_types GenTreeVecCon::GetSimdBaseType() const
+{
+    CorInfoType simdBaseJitType = GetSimdBaseJitType();
+
+    if (simdBaseJitType == CORINFO_TYPE_UNDEF)
+    {
+        return TYP_UNKNOWN;
+    }
+    return JitType2PreciseVarType(simdBaseJitType);
+}
+
 //------------------------------------------------------------------------
 // IsFieldAddr: Is "this" a static or class field address?
 //
@@ -16800,247 +17197,6 @@ bool Compiler::gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_
     return fieldTyp != TYP_REF;
 }
 
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// gtGetSIMDZero: Get a zero value of the appropriate SIMD type.
-//
-// Arguments:
-//    var_types       - The simdType
-//    simdBaseJitType - The SIMD base JIT type we need
-//    simdHandle      - The handle for the SIMD type
-//
-// Return Value:
-//    A node generating the appropriate Zero, if we are able to discern it,
-//    otherwise null (note that this shouldn't happen, but callers should
-//    be tolerant of this case).
-
-GenTree* Compiler::gtGetSIMDZero(var_types simdType, CorInfoType simdBaseJitType, CORINFO_CLASS_HANDLE simdHandle)
-{
-    bool found    = false;
-    bool isHWSIMD = true;
-    noway_assert(m_simdHandleCache != nullptr);
-
-    // First, determine whether this is Vector<T>.
-    if (simdType == getSIMDVectorType())
-    {
-        switch (simdBaseJitType)
-        {
-            case CORINFO_TYPE_FLOAT:
-                found = (simdHandle == m_simdHandleCache->SIMDFloatHandle);
-                break;
-            case CORINFO_TYPE_DOUBLE:
-                found = (simdHandle == m_simdHandleCache->SIMDDoubleHandle);
-                break;
-            case CORINFO_TYPE_INT:
-                found = (simdHandle == m_simdHandleCache->SIMDIntHandle);
-                break;
-            case CORINFO_TYPE_USHORT:
-                found = (simdHandle == m_simdHandleCache->SIMDUShortHandle);
-                break;
-            case CORINFO_TYPE_UBYTE:
-                found = (simdHandle == m_simdHandleCache->SIMDUByteHandle);
-                break;
-            case CORINFO_TYPE_SHORT:
-                found = (simdHandle == m_simdHandleCache->SIMDShortHandle);
-                break;
-            case CORINFO_TYPE_BYTE:
-                found = (simdHandle == m_simdHandleCache->SIMDByteHandle);
-                break;
-            case CORINFO_TYPE_LONG:
-                found = (simdHandle == m_simdHandleCache->SIMDLongHandle);
-                break;
-            case CORINFO_TYPE_UINT:
-                found = (simdHandle == m_simdHandleCache->SIMDUIntHandle);
-                break;
-            case CORINFO_TYPE_ULONG:
-                found = (simdHandle == m_simdHandleCache->SIMDULongHandle);
-                break;
-            case CORINFO_TYPE_NATIVEINT:
-                found = (simdHandle == m_simdHandleCache->SIMDNIntHandle);
-                break;
-            case CORINFO_TYPE_NATIVEUINT:
-                found = (simdHandle == m_simdHandleCache->SIMDNUIntHandle);
-                break;
-            default:
-                break;
-        }
-        if (found)
-        {
-            isHWSIMD = false;
-        }
-    }
-
-    if (!found)
-    {
-        // We must still have isHWSIMD set to true, and the only non-HW types left are the fixed types.
-        switch (simdType)
-        {
-            case TYP_SIMD8:
-                switch (simdBaseJitType)
-                {
-                    case CORINFO_TYPE_FLOAT:
-                        if (simdHandle == m_simdHandleCache->SIMDVector2Handle)
-                        {
-                            isHWSIMD = false;
-                        }
-#if defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS)
-                        else
-                        {
-                            assert(simdHandle == m_simdHandleCache->Vector64FloatHandle);
-                        }
-                        break;
-                    case CORINFO_TYPE_INT:
-                        assert(simdHandle == m_simdHandleCache->Vector64IntHandle);
-                        break;
-                    case CORINFO_TYPE_USHORT:
-                        assert(simdHandle == m_simdHandleCache->Vector64UShortHandle);
-                        break;
-                    case CORINFO_TYPE_UBYTE:
-                        assert(simdHandle == m_simdHandleCache->Vector64UByteHandle);
-                        break;
-                    case CORINFO_TYPE_SHORT:
-                        assert(simdHandle == m_simdHandleCache->Vector64ShortHandle);
-                        break;
-                    case CORINFO_TYPE_BYTE:
-                        assert(simdHandle == m_simdHandleCache->Vector64ByteHandle);
-                        break;
-                    case CORINFO_TYPE_UINT:
-                        assert(simdHandle == m_simdHandleCache->Vector64UIntHandle);
-#endif // defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS)
-                        break;
-                    default:
-                        break;
-                }
-                break;
-
-            case TYP_SIMD12:
-                assert((simdBaseJitType == CORINFO_TYPE_FLOAT) && (simdHandle == m_simdHandleCache->SIMDVector3Handle));
-                isHWSIMD = false;
-                break;
-
-            case TYP_SIMD16:
-                switch (simdBaseJitType)
-                {
-                    case CORINFO_TYPE_FLOAT:
-                        if (simdHandle == m_simdHandleCache->SIMDVector4Handle)
-                        {
-                            isHWSIMD = false;
-                        }
-#if defined(FEATURE_HW_INTRINSICS)
-                        else
-                        {
-                            assert(simdHandle == m_simdHandleCache->Vector128FloatHandle);
-                        }
-                        break;
-                    case CORINFO_TYPE_DOUBLE:
-                        assert(simdHandle == m_simdHandleCache->Vector128DoubleHandle);
-                        break;
-                    case CORINFO_TYPE_INT:
-                        assert(simdHandle == m_simdHandleCache->Vector128IntHandle);
-                        break;
-                    case CORINFO_TYPE_USHORT:
-                        assert(simdHandle == m_simdHandleCache->Vector128UShortHandle);
-                        break;
-                    case CORINFO_TYPE_UBYTE:
-                        assert(simdHandle == m_simdHandleCache->Vector128UByteHandle);
-                        break;
-                    case CORINFO_TYPE_SHORT:
-                        assert(simdHandle == m_simdHandleCache->Vector128ShortHandle);
-                        break;
-                    case CORINFO_TYPE_BYTE:
-                        assert(simdHandle == m_simdHandleCache->Vector128ByteHandle);
-                        break;
-                    case CORINFO_TYPE_LONG:
-                        assert(simdHandle == m_simdHandleCache->Vector128LongHandle);
-                        break;
-                    case CORINFO_TYPE_UINT:
-                        assert(simdHandle == m_simdHandleCache->Vector128UIntHandle);
-                        break;
-                    case CORINFO_TYPE_ULONG:
-                        assert(simdHandle == m_simdHandleCache->Vector128ULongHandle);
-                        break;
-                    case CORINFO_TYPE_NATIVEINT:
-                        assert(simdHandle == m_simdHandleCache->Vector128NIntHandle);
-                        break;
-                    case CORINFO_TYPE_NATIVEUINT:
-                        assert(simdHandle == m_simdHandleCache->Vector128NUIntHandle);
-                        break;
-#endif // defined(FEATURE_HW_INTRINSICS)
-
-                    default:
-                        break;
-                }
-                break;
-
-#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
-            case TYP_SIMD32:
-                switch (simdBaseJitType)
-                {
-                    case CORINFO_TYPE_FLOAT:
-                        assert(simdHandle == m_simdHandleCache->Vector256FloatHandle);
-                        break;
-                    case CORINFO_TYPE_DOUBLE:
-                        assert(simdHandle == m_simdHandleCache->Vector256DoubleHandle);
-                        break;
-                    case CORINFO_TYPE_INT:
-                        assert(simdHandle == m_simdHandleCache->Vector256IntHandle);
-                        break;
-                    case CORINFO_TYPE_USHORT:
-                        assert(simdHandle == m_simdHandleCache->Vector256UShortHandle);
-                        break;
-                    case CORINFO_TYPE_UBYTE:
-                        assert(simdHandle == m_simdHandleCache->Vector256UByteHandle);
-                        break;
-                    case CORINFO_TYPE_SHORT:
-                        assert(simdHandle == m_simdHandleCache->Vector256ShortHandle);
-                        break;
-                    case CORINFO_TYPE_BYTE:
-                        assert(simdHandle == m_simdHandleCache->Vector256ByteHandle);
-                        break;
-                    case CORINFO_TYPE_LONG:
-                        assert(simdHandle == m_simdHandleCache->Vector256LongHandle);
-                        break;
-                    case CORINFO_TYPE_UINT:
-                        assert(simdHandle == m_simdHandleCache->Vector256UIntHandle);
-                        break;
-                    case CORINFO_TYPE_ULONG:
-                        assert(simdHandle == m_simdHandleCache->Vector256ULongHandle);
-                        break;
-                    case CORINFO_TYPE_NATIVEINT:
-                        assert(simdHandle == m_simdHandleCache->Vector256NIntHandle);
-                        break;
-                    case CORINFO_TYPE_NATIVEUINT:
-                        assert(simdHandle == m_simdHandleCache->Vector256NUIntHandle);
-                        break;
-                    default:
-                        break;
-                }
-                break;
-#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
-            default:
-                break;
-        }
-    }
-
-    unsigned size = genTypeSize(simdType);
-    if (isHWSIMD)
-    {
-#if defined(FEATURE_HW_INTRINSICS)
-        return gtNewSimdZeroNode(simdType, simdBaseJitType, size, /* isSimdAsHWIntrinsic */ false);
-#else
-        JITDUMP("Coudn't find the matching SIMD type for %s<%s> in gtGetSIMDZero\n", varTypeName(simdType),
-                varTypeName(JitType2PreciseVarType(simdBaseJitType)));
-
-        return nullptr;
-#endif // FEATURE_HW_INTRINSICS
-    }
-    else
-    {
-        return gtNewSIMDVectorZero(simdType, simdBaseJitType, size);
-    }
-}
-#endif // FEATURE_SIMD
-
 CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
 {
     CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
@@ -17158,7 +17314,20 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
                 }
                 break;
 #endif
+            case GT_CNS_VEC:
+            {
+#if defined(FEATURE_HW_INTRINSICS)
+                structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsVecCon()->GetSimdBaseJitType());
+#endif // FEATURE_HW_INTRINSICS
+
+#if defined(FEATURE_SIMD)
+                if (structHnd == NO_CLASS_HANDLE)
+                {
+                    structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsVecCon()->GetSimdBaseJitType());
+                }
+#endif // FEATURE_SIMD
                 break;
+            }
         }
         // TODO-1stClassStructs: add a check that `structHnd != NO_CLASS_HANDLE`,
         // nowadays it won't work because the right part of an ASG could have struct type without a handle
@@ -18362,20 +18531,6 @@ bool GenTree::isContainableHWIntrinsic() const
             return false;
         }
     }
-#elif TARGET_ARM64
-    switch (AsHWIntrinsic()->GetHWIntrinsicId())
-    {
-        case NI_Vector64_get_Zero:
-        case NI_Vector128_get_Zero:
-        {
-            return true;
-        }
-
-        default:
-        {
-            return false;
-        }
-    }
 #else
     return false;
 #endif // TARGET_XARCH
@@ -18575,7 +18730,7 @@ GenTree* Compiler::gtNewSimdAbsNode(
     else
     {
         GenTree*             tmp;
-        CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+        CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
         GenTree* op1Dup1;
         op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
@@ -18586,11 +18741,11 @@ GenTree* Compiler::gtNewSimdAbsNode(
                                nullptr DEBUGARG("Clone op1 for vector abs"));
 
         // op1 = op1 < Zero
-        tmp = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+        tmp = gtNewZeroConNode(type, simdBaseJitType);
         op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, tmp, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
 
         // tmp = Zero - op1Dup1
-        tmp = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+        tmp = gtNewZeroConNode(type, simdBaseJitType);
         tmp = gtNewSimdBinOpNode(GT_SUB, type, tmp, op1Dup1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
 
         // result = ConditionalSelect(op1, tmp, op1Dup2)
@@ -18646,7 +18801,7 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps  op,
     }
 
     NamedIntrinsic       intrinsic = NI_Illegal;
-    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSIMD(type, simdBaseJitType);
+    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
     switch (op)
     {
@@ -19377,7 +19532,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps  op,
     assert(varTypeIsArithmetic(simdBaseType));
 
     NamedIntrinsic       intrinsic = NI_Illegal;
-    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSIMD(type, simdBaseJitType);
+    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
     switch (op)
     {
@@ -20025,8 +20180,6 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps  op,
             // We want to generate a comparison along the lines of
             // GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet
 
-            NamedIntrinsic getAllBitsSet = NI_Illegal;
-
             if (simdSize == 32)
             {
                 // TODO-XArch-CQ: It's a non-trivial amount of work to support these
@@ -20034,14 +20187,11 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps  op,
                 // other things, inverting the comparison and potentially support for a
                 // new Avx.TestNotZ intrinsic to ensure the codegen remains efficient.
                 assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
-
-                intrinsic     = NI_Vector256_op_Equality;
-                getAllBitsSet = NI_Vector256_get_AllBitsSet;
+                intrinsic = NI_Vector256_op_Equality;
             }
             else
             {
-                intrinsic     = NI_Vector128_op_Equality;
-                getAllBitsSet = NI_Vector128_get_AllBitsSet;
+                intrinsic = NI_Vector128_op_Equality;
             }
 
             op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize,
@@ -20058,7 +20208,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps  op,
                 simdBaseJitType = CORINFO_TYPE_LONG;
             }
 
-            op2 = gtNewSimdHWIntrinsicNode(simdType, getAllBitsSet, simdBaseJitType, simdSize);
+            op2 = gtNewAllBitsSetConNode(simdType, simdBaseJitType);
             break;
         }
 #elif defined(TARGET_ARM64)
@@ -20076,17 +20226,13 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps  op,
             // We want to generate a comparison along the lines of
             // GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet
 
-            NamedIntrinsic getAllBitsSet = NI_Illegal;
-
             if (simdSize == 8)
             {
-                intrinsic     = NI_Vector64_op_Equality;
-                getAllBitsSet = NI_Vector64_get_AllBitsSet;
+                intrinsic = NI_Vector64_op_Equality;
             }
             else
             {
-                intrinsic     = NI_Vector128_op_Equality;
-                getAllBitsSet = NI_Vector128_get_AllBitsSet;
+                intrinsic = NI_Vector128_op_Equality;
             }
 
             op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize,
@@ -20103,7 +20249,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps  op,
                 simdBaseJitType = CORINFO_TYPE_LONG;
             }
 
-            op2 = gtNewSimdHWIntrinsicNode(simdType, getAllBitsSet, simdBaseJitType, simdSize);
+            op2 = gtNewAllBitsSetConNode(simdType, simdBaseJitType);
             break;
         }
 #else
@@ -20186,7 +20332,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps  op,
                 simdBaseJitType = CORINFO_TYPE_LONG;
             }
 
-            op2 = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+            op2 = gtNewZeroConNode(simdType, simdBaseJitType);
             break;
         }
 
@@ -20231,7 +20377,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps  op,
                 simdBaseJitType = CORINFO_TYPE_LONG;
             }
 
-            op2 = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+            op2 = gtNewZeroConNode(simdType, simdBaseJitType);
             break;
         }
 
@@ -20506,7 +20652,7 @@ GenTree* Compiler::gtNewSimdMaxNode(var_types   type,
     assert(varTypeIsArithmetic(simdBaseType));
 
     NamedIntrinsic       intrinsic = NI_Illegal;
-    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSIMD(type, simdBaseJitType);
+    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
 #if defined(TARGET_XARCH)
     if (simdSize == 32)
@@ -20690,7 +20836,7 @@ GenTree* Compiler::gtNewSimdMinNode(var_types   type,
     assert(varTypeIsArithmetic(simdBaseType));
 
     NamedIntrinsic       intrinsic = NI_Illegal;
-    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSIMD(type, simdBaseJitType);
+    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
 #if defined(TARGET_XARCH)
     if (simdSize == 32)
@@ -20895,7 +21041,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
                 // code formatting, its too long to reasonably display here.
 
                 CorInfoType opBaseJitType   = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_SHORT : CORINFO_TYPE_USHORT;
-                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
 
                 tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x00FF), NI_Vector256_Create, opBaseJitType,
                                                 simdSize, isSimdAsHWIntrinsic);
@@ -20936,7 +21082,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
                 // return Avx2.Permute4x64(tmp4.AsUInt64(), SHUFFLE_WYZX).As<T>();
 
                 CorInfoType          opBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT;
-                CORINFO_CLASS_HANDLE clsHnd        = gtGetStructHandleForSIMD(type, opBaseJitType);
+                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
 
                 tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x0000FFFF), NI_Vector256_Create, opBaseJitType,
                                                 simdSize, isSimdAsHWIntrinsic);
@@ -20976,7 +21122,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
                 // return Avx2.Permute4x64(tmp3.AsUInt64(), SHUFFLE_WYZX).AsUInt32();
 
                 CorInfoType          opBaseJitType = (simdBaseType == TYP_INT) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
-                CORINFO_CLASS_HANDLE clsHnd        = gtGetStructHandleForSIMD(type, opBaseJitType);
+                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
 
                 GenTree* op1Dup;
                 op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
@@ -21047,7 +21193,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
                 // return Sse2.PackUnsignedSaturate(tmp1, tmp2).As<T>();
 
                 CorInfoType opBaseJitType   = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_SHORT : CORINFO_TYPE_USHORT;
-                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
 
                 tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x00FF), NI_Vector128_Create, opBaseJitType,
                                                 simdSize, isSimdAsHWIntrinsic);
@@ -21074,7 +21220,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
                 // ...
 
                 CorInfoType          opBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT;
-                CORINFO_CLASS_HANDLE clsHnd        = gtGetStructHandleForSIMD(type, opBaseJitType);
+                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
 
                 if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
                 {
@@ -21133,7 +21279,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
                     tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize,
                                                     isSimdAsHWIntrinsic);
 
-                    clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+                    clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
                     GenTree* tmp1Dup;
                     tmp1 = impCloneExpr(tmp1, &tmp1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
@@ -21168,7 +21314,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
                 // return Sse2.UnpackLow(tmp1, tmp2).As<T>();
 
                 CorInfoType          opBaseJitType = (simdBaseType == TYP_INT) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
-                CORINFO_CLASS_HANDLE clsHnd        = gtGetStructHandleForSIMD(type, opBaseJitType);
+                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
 
                 GenTree* op1Dup;
                 op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
@@ -21305,7 +21451,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
     {
         // AllBitsSet represents indices that are always "out of range" which means zero should be
         // selected for every element. We can special-case this down to just returning a zero node
-        return gtNewSimdZeroNode(type, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+        return gtNewZeroConNode(type, simdBaseJitType);
     }
 
     if (op2->IsVectorZero())
@@ -21321,16 +21467,16 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
     size_t elementCount = simdSize / elementSize;
 
 #if defined(TARGET_XARCH)
-    uint8_t  control    = 0;
-    bool     crossLane  = false;
-    bool     needsZero  = varTypeIsSmallInt(simdBaseType);
-    uint64_t value      = 0;
-    uint8_t  vecCns[32] = {};
-    uint8_t  mskCns[32] = {};
+    uint8_t  control   = 0;
+    bool     crossLane = false;
+    bool     needsZero = varTypeIsSmallInt(simdBaseType);
+    uint64_t value     = 0;
+    simd32_t vecCns    = {};
+    simd32_t mskCns    = {};
 
     for (size_t index = 0; index < elementCount; index++)
     {
-        value = op2->GetIntegralVectorConstElement(index);
+        value = op2->GetIntegralVectorConstElement(index, simdBaseType);
 
         if (value < elementCount)
         {
@@ -21363,12 +21509,12 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
 
             for (uint32_t i = 0; i < elementSize; i++)
             {
-                vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
+                vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
 
                 // When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet
                 // so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
 
-                mskCns[(index * elementSize) + i] = 0xFF;
+                mskCns.u8[(index * elementSize) + i] = 0xFF;
             }
         }
         else
@@ -21382,12 +21528,12 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
 
             for (uint32_t i = 0; i < elementSize; i++)
             {
-                vecCns[(index * elementSize) + i] = 0xFF;
+                vecCns.u8[(index * elementSize) + i] = 0xFF;
 
                 // When Ssse3 is not supported, we need to adjust the constant to be Zero
                 // so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
 
-                mskCns[(index * elementSize) + i] = 0x00;
+                mskCns.u8[(index * elementSize) + i] = 0x00;
             }
         }
     }
@@ -21424,33 +21570,19 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
             GenTree* op1Lower = gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize,
                                                          isSimdAsHWIntrinsic);
 
-            IntrinsicNodeBuilder nodeBuilder1(getAllocator(CMK_ASTNode), 16);
-
-            for (uint32_t i = 0; i < 16; i++)
-            {
-                nodeBuilder1.AddOperand(i, gtNewIconNode(vecCns[i]));
-            }
+            op2                          = gtNewVconNode(TYP_SIMD16, simdBaseJitType);
+            op2->AsVecCon()->gtSimd16Val = vecCns.v128[0];
 
-            op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder1), NI_Vector128_Create, simdBaseJitType, 16,
-                                           isSimdAsHWIntrinsic);
-
-            op1Lower = gtNewSimdHWIntrinsicNode(type, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
+            op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
                                                 isSimdAsHWIntrinsic);
 
             GenTree* op1Upper = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128,
                                                          simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
 
-            IntrinsicNodeBuilder nodeBuilder2(getAllocator(CMK_ASTNode), 16);
-
-            for (uint32_t i = 0; i < 16; i++)
-            {
-                nodeBuilder2.AddOperand(i, gtNewIconNode(vecCns[16 + i]));
-            }
-
-            op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder2), NI_Vector128_Create, simdBaseJitType, 16,
-                                           isSimdAsHWIntrinsic);
+            op2                          = gtNewVconNode(TYP_SIMD16, simdBaseJitType);
+            op2->AsVecCon()->gtSimd16Val = vecCns.v128[1];
 
-            op1Upper = gtNewSimdHWIntrinsicNode(type, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
+            op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
                                                 isSimdAsHWIntrinsic);
 
             return gtNewSimdHWIntrinsicNode(type, op1Lower, op1Upper, gtNewIconNode(1), NI_AVX_InsertVector128,
@@ -21459,18 +21591,13 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
 
         if (elementSize == 4)
         {
-            IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), elementCount);
-
             for (uint32_t i = 0; i < elementCount; i++)
             {
-                uint8_t value = (uint8_t)(vecCns[i * elementSize] / elementSize);
-                nodeBuilder.AddOperand(i, gtNewIconNode(value));
+                vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize);
             }
 
-            CorInfoType indicesJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
-
-            op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector256_Create, indicesJitType, simdSize,
-                                           isSimdAsHWIntrinsic);
+            op2                          = gtNewVconNode(type, simdBaseJitType);
+            op2->AsVecCon()->gtSimd32Val = vecCns;
 
             // swap the operands to match the encoding requirements
             retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize,
@@ -21491,15 +21618,8 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
         {
             simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
 
-            IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize);
-
-            for (uint32_t i = 0; i < simdSize; i++)
-            {
-                nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i]));
-            }
-
-            op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize,
-                                           isSimdAsHWIntrinsic);
+            op2                          = gtNewVconNode(type, simdBaseJitType);
+            op2->AsVecCon()->gtSimd16Val = vecCns.v128[0];
 
             return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize,
                                             isSimdAsHWIntrinsic);
@@ -21544,69 +21664,53 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types   type,
     {
         assert(!compIsaSupportedDebugOnly(InstructionSet_SSSE3));
 
-        IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize);
-
-        for (uint32_t i = 0; i < simdSize; i++)
-        {
-            nodeBuilder.AddOperand(i, gtNewIconNode(mskCns[i]));
-        }
+        op2                          = gtNewVconNode(type, simdBaseJitType);
+        op2->AsVecCon()->gtSimd16Val = mskCns.v128[0];
 
-        op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize,
-                                       isSimdAsHWIntrinsic);
-
-        GenTree* zero = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+        GenTree* zero = gtNewZeroConNode(type, simdBaseJitType);
         retNode       = gtNewSimdCndSelNode(type, op2, retNode, zero, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
     }
 
     return retNode;
 #elif defined(TARGET_ARM64)
-    uint64_t value      = 0;
-    uint8_t  vecCns[16] = {};
+    uint64_t value  = 0;
+    simd16_t vecCns = {};
 
     for (size_t index = 0; index < elementCount; index++)
     {
-        value = op2->GetIntegralVectorConstElement(index);
+        value = op2->GetIntegralVectorConstElement(index, simdBaseType);
 
         if (value < elementCount)
         {
             for (uint32_t i = 0; i < elementSize; i++)
             {
-                vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
+                vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
             }
         }
         else
         {
             for (uint32_t i = 0; i < elementSize; i++)
             {
-                vecCns[(index * elementSize) + i] = 0xFF;
+                vecCns.u8[(index * elementSize) + i] = 0xFF;
             }
         }
     }
 
-    NamedIntrinsic createIntrinsic = NI_Vector64_Create;
     NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup;
 
     if (simdSize == 16)
     {
-        createIntrinsic = NI_Vector128_Create;
         lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup;
 
         op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128, simdBaseJitType, simdSize,
                                        isSimdAsHWIntrinsic);
     }
 
-    IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize);
-
-    for (uint32_t i = 0; i < simdSize; i++)
-    {
-        nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i]));
-    }
-
     // VectorTableLookup is only valid on byte/sbyte
     simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
 
-    op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), createIntrinsic, simdBaseJitType, simdSize,
-                                   isSimdAsHWIntrinsic);
+    op2                          = gtNewVconNode(type, simdBaseJitType);
+    op2->AsVecCon()->gtSimd16Val = vecCns;
 
     return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
 #else
@@ -21677,7 +21781,7 @@ GenTree* Compiler::gtNewSimdSumNode(
 
     NamedIntrinsic       intrinsic = NI_Illegal;
     GenTree*             tmp       = nullptr;
-    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSIMD(simdType, simdBaseJitType);
+    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSimdOrHW(simdType, simdBaseJitType, isSimdAsHWIntrinsic);
 
 #if defined(TARGET_XARCH)
     assert(!varTypeIsByte(simdBaseType) && !varTypeIsLong(simdBaseType));
@@ -21840,7 +21944,7 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps  op,
                 assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
                 assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
             }
-            op2 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+            op2 = gtNewZeroConNode(type, simdBaseJitType);
 
             // Zero - op1
             return gtNewSimdBinOpNode(GT_SUB, type, op2, op1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
@@ -21849,11 +21953,7 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps  op,
         case GT_NOT:
         {
             assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX));
-
-            intrinsic = (simdSize == 32) ? NI_Vector256_get_AllBitsSet : NI_Vector128_get_AllBitsSet;
-            op2       = gtNewSimdHWIntrinsicNode(type, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
-
-            // op1 ^ AllBitsSet
+            op2 = gtNewAllBitsSetConNode(type, simdBaseJitType);
             return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
         }
 #elif defined(TARGET_ARM64)
@@ -21879,7 +21979,7 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps  op,
             else
             {
                 // Zero - op1
-                op2 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+                op2 = gtNewZeroConNode(type, simdBaseJitType);
                 return gtNewSimdBinOpNode(GT_SUB, type, op2, op1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
             }
         }
@@ -22006,11 +22106,11 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(
     }
     else
     {
-        tmp1 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+        tmp1 = gtNewZeroConNode(type, simdBaseJitType);
 
         if (varTypeIsSigned(simdBaseType))
         {
-            CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+            CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
             GenTree* op1Dup;
             op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
@@ -22133,7 +22233,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(
     else if (varTypeIsFloating(simdBaseType))
     {
         assert(simdBaseType == TYP_FLOAT);
-        CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+        CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
         GenTree* op1Dup;
         op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
@@ -22183,11 +22283,11 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(
     }
     else
     {
-        tmp1 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+        tmp1 = gtNewZeroConNode(type, simdBaseJitType);
 
         if (varTypeIsSigned(simdBaseType))
         {
-            CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+            CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
 
             GenTree* op1Dup;
             op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
@@ -22246,7 +22346,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(
         assert(intrinsic != NI_Illegal);
 
         tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
-        zero = gtNewSimdZeroNode(TYP_SIMD16, simdBaseJitType, 16, isSimdAsHWIntrinsic);
+        zero = gtNewZeroConNode(TYP_SIMD16, simdBaseJitType);
         tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
                                         simdBaseJitType, 16, isSimdAsHWIntrinsic);
         return gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_GetLower, simdBaseJitType, simdSize,
@@ -22341,32 +22441,6 @@ GenTree* Compiler::gtNewSimdWithElementNode(var_types   type,
     return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
 }
 
-GenTree* Compiler::gtNewSimdZeroNode(var_types   type,
-                                     CorInfoType simdBaseJitType,
-                                     unsigned    simdSize,
-                                     bool        isSimdAsHWIntrinsic)
-{
-    assert(IsBaselineSimdIsaSupportedDebugOnly());
-
-    assert(varTypeIsSIMD(type));
-    assert(getSIMDTypeForSize(simdSize) == type);
-
-    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
-    assert(varTypeIsArithmetic(simdBaseType));
-
-    NamedIntrinsic intrinsic = NI_Illegal;
-
-#if defined(TARGET_XARCH)
-    intrinsic = (simdSize == 32) ? NI_Vector256_get_Zero : NI_Vector128_get_Zero;
-#elif defined(TARGET_ARM64)
-    intrinsic     = (simdSize > 8) ? NI_Vector128_get_Zero : NI_Vector64_get_Zero;
-#else
-#error Unsupported platform
-#endif // !TARGET_XARCH && !TARGET_ARM64
-
-    return gtNewSimdHWIntrinsicNode(type, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
-}
-
 GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID)
 {
     return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID,
index a813c2b..34ef15e 100644 (file)
@@ -534,7 +534,7 @@ enum GenTreeFlags : unsigned int
     GTF_IND_FLAGS = GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | GTF_IND_UNALIGNED | GTF_IND_INVARIANT |
                     GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP
 #if defined(TARGET_XARCH)
-                     | GTF_IND_DONT_EXTEND 
+                     | GTF_IND_DONT_EXTEND
 #endif // TARGET_XARCH
                     ,
 
@@ -931,7 +931,12 @@ public:
 
     bool isContainedFltOrDblImmed() const
     {
-        return isContained() && (OperGet() == GT_CNS_DBL);
+        return isContained() && OperIs(GT_CNS_DBL);
+    }
+
+    bool isContainedVecImmed() const
+    {
+        return isContained() && OperIs(GT_CNS_VEC);
     }
 
     bool isLclField() const
@@ -950,7 +955,7 @@ public:
 
     bool isUsedFromMemory() const
     {
-        return ((isContained() && (isMemoryOp() || (OperGet() == GT_LCL_VAR) || (OperGet() == GT_CNS_DBL))) ||
+        return ((isContained() && (isMemoryOp() || OperIs(GT_LCL_VAR, GT_CNS_DBL, GT_CNS_VEC))) ||
                 isUsedFromSpillTemp());
     }
 
@@ -1097,8 +1102,8 @@ public:
         if (gtType == TYP_VOID)
         {
             // These are the only operators which can produce either VOID or non-VOID results.
-            assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsSIMD() ||
-                   OperIsHWIntrinsic());
+            assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsSimdOrHWintrinsic() ||
+                   IsCnsVec());
             return false;
         }
 
@@ -1155,8 +1160,8 @@ public:
 
     static bool OperIsConst(genTreeOps gtOper)
     {
-        static_assert_no_msg(AreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR));
-        return (GT_CNS_INT <= gtOper) && (gtOper <= GT_CNS_STR);
+        static_assert_no_msg(AreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR, GT_CNS_VEC));
+        return (GT_CNS_INT <= gtOper) && (gtOper <= GT_CNS_VEC);
     }
 
     bool OperIsConst() const
@@ -1726,16 +1731,13 @@ public:
     bool IsValidCallArgument();
 #endif // DEBUG
 
-    inline bool IsFPZero() const;
     inline bool IsIntegralConst(ssize_t constVal) const;
-    inline bool IsIntegralConstVector(ssize_t constVal) const;
-    inline bool IsSIMDZero() const;
     inline bool IsFloatPositiveZero() const;
     inline bool IsVectorZero() const;
     inline bool IsVectorAllBitsSet() const;
     inline bool IsVectorConst();
 
-    inline uint64_t GetIntegralVectorConstElement(size_t index);
+    inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType);
 
     inline bool IsBoxedValue();
 
@@ -2092,6 +2094,8 @@ public:
 
     inline bool IsCnsNonZeroFltOrDbl() const;
 
+    inline bool IsCnsVec() const;
+
     bool IsIconHandle() const
     {
         return (gtOper == GT_CNS_INT) && ((gtFlags & GTF_ICON_HDL_MASK) != 0);
@@ -3250,6 +3254,197 @@ struct GenTreeStrCon : public GenTree
 #endif
 };
 
+// GenTreeVecCon -- vector  constant (GT_CNS_VEC)
+//
+struct GenTreeVecCon : public GenTree
+{
+    union {
+        simd8_t  gtSimd8Val;
+        simd12_t gtSimd12Val;
+        simd16_t gtSimd16Val;
+        simd32_t gtSimd32Val;
+    };
+
+private:
+    // TODO-1stClassStructs: Tracking the size and base type should be unnecessary since the
+    // size should be `gtType` and the handle should be looked up at callsites where required
+
+    unsigned char gtSimdBaseJitType; // SIMD vector base JIT type
+    unsigned char gtSimdSize;        // SIMD vector size in bytes
+
+public:
+    CorInfoType GetSimdBaseJitType() const
+    {
+        return (CorInfoType)gtSimdBaseJitType;
+    }
+
+    void SetSimdBaseJitType(CorInfoType simdBaseJitType)
+    {
+        gtSimdBaseJitType = (unsigned char)simdBaseJitType;
+        assert(gtSimdBaseJitType == simdBaseJitType);
+    }
+
+    var_types GetSimdBaseType() const;
+
+    unsigned char GetSimdSize() const
+    {
+        return gtSimdSize;
+    }
+
+    void SetSimdSize(unsigned simdSize)
+    {
+        gtSimdSize = (unsigned char)simdSize;
+        assert(gtSimdSize == simdSize);
+    }
+
+#if defined(FEATURE_HW_INTRINSICS)
+    static bool IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val);
+
+    static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, simd32_t& simd32Val, var_types baseType);
+#endif // FEATURE_HW_INTRINSICS
+
+    bool IsAllBitsSet() const
+    {
+        switch (gtType)
+        {
+#if defined(FEATURE_SIMD)
+            case TYP_LONG:
+            case TYP_DOUBLE:
+            case TYP_SIMD8:
+            {
+                // TODO-1stClassStructs: do not retype SIMD nodes
+                return (gtSimd8Val.u64[0] == 0xFFFFFFFFFFFFFFFF);
+            }
+
+            case TYP_SIMD12:
+            {
+                return (gtSimd12Val.u32[0] == 0xFFFFFFFF) && (gtSimd12Val.u32[1] == 0xFFFFFFFF) &&
+                       (gtSimd12Val.u32[2] == 0xFFFFFFFF);
+            }
+
+            case TYP_SIMD16:
+            {
+                return (gtSimd16Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd16Val.u64[1] == 0xFFFFFFFFFFFFFFFF);
+            }
+
+            case TYP_SIMD32:
+            {
+                return (gtSimd32Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[1] == 0xFFFFFFFFFFFFFFFF) &&
+                       (gtSimd32Val.u64[2] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[3] == 0xFFFFFFFFFFFFFFFF);
+            }
+#endif // FEATURE_SIMD
+
+            default:
+            {
+                unreached();
+            }
+        }
+    }
+
+    static bool Equals(const GenTreeVecCon* left, const GenTreeVecCon* right)
+    {
+        var_types gtType = left->TypeGet();
+
+        if (gtType != right->TypeGet())
+        {
+            return false;
+        }
+
+        switch (gtType)
+        {
+#if defined(FEATURE_SIMD)
+            case TYP_LONG:
+            case TYP_DOUBLE:
+            case TYP_SIMD8:
+            {
+                // TODO-1stClassStructs: do not retype SIMD nodes
+                return (left->gtSimd8Val.u64[0] == right->gtSimd8Val.u64[0]);
+            }
+
+            case TYP_SIMD12:
+            {
+                return (left->gtSimd12Val.u32[0] == right->gtSimd12Val.u32[0]) &&
+                       (left->gtSimd12Val.u32[1] == right->gtSimd12Val.u32[1]) &&
+                       (left->gtSimd12Val.u32[2] == right->gtSimd12Val.u32[2]);
+            }
+
+            case TYP_SIMD16:
+            {
+                return (left->gtSimd16Val.u64[0] == right->gtSimd16Val.u64[0]) &&
+                       (left->gtSimd16Val.u64[1] == right->gtSimd16Val.u64[1]);
+            }
+
+            case TYP_SIMD32:
+            {
+                return (left->gtSimd32Val.u64[0] == right->gtSimd32Val.u64[0]) &&
+                       (left->gtSimd32Val.u64[1] == right->gtSimd32Val.u64[1]) &&
+                       (left->gtSimd32Val.u64[2] == right->gtSimd32Val.u64[2]) &&
+                       (left->gtSimd32Val.u64[3] == right->gtSimd32Val.u64[3]);
+            }
+#endif // FEATURE_SIMD
+
+            default:
+            {
+                unreached();
+            }
+        }
+    }
+
+    bool IsZero() const
+    {
+        switch (gtType)
+        {
+#if defined(FEATURE_SIMD)
+            case TYP_LONG:
+            case TYP_DOUBLE:
+            case TYP_SIMD8:
+            {
+                // TODO-1stClassStructs: do not retype SIMD nodes
+                return (gtSimd8Val.u64[0] == 0x0000000000000000);
+            }
+
+            case TYP_SIMD12:
+            {
+                return (gtSimd12Val.u32[0] == 0x00000000) && (gtSimd12Val.u32[1] == 0x00000000) &&
+                       (gtSimd12Val.u32[2] == 0x00000000);
+            }
+
+            case TYP_SIMD16:
+            {
+                return (gtSimd16Val.u64[0] == 0x0000000000000000) && (gtSimd16Val.u64[1] == 0x0000000000000000);
+            }
+
+            case TYP_SIMD32:
+            {
+                return (gtSimd32Val.u64[0] == 0x0000000000000000) && (gtSimd32Val.u64[1] == 0x0000000000000000) &&
+                       (gtSimd32Val.u64[2] == 0x0000000000000000) && (gtSimd32Val.u64[3] == 0x0000000000000000);
+            }
+#endif // FEATURE_SIMD
+
+            default:
+            {
+                unreached();
+            }
+        }
+    }
+
+    GenTreeVecCon(var_types type, CorInfoType simdBaseJitType, unsigned simdSize)
+        : GenTree(GT_CNS_VEC, type)
+        , gtSimdBaseJitType((unsigned char)simdBaseJitType)
+        , gtSimdSize((unsigned char)simdSize)
+    {
+        assert(varTypeIsSIMD(type));
+        assert(gtSimdBaseJitType == simdBaseJitType);
+        assert(gtSimdSize == simdSize);
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeVecCon() : GenTree()
+    {
+    }
+#endif
+};
+
 // Common supertype of LCL_VAR, LCL_FLD, REG_VAR, PHI_ARG
 // This inherits from UnOp because lclvar stores are Unops
 struct GenTreeLclVarCommon : public GenTreeUnOp
@@ -8065,7 +8260,7 @@ inline bool GenTree::OperIsInitBlkOp()
     {
         src = AsBlk()->Data()->gtSkipReloadOrCopy();
     }
-    return src->OperIsInitVal() || src->OperIsConst();
+    return src->OperIsInitVal() || src->IsIntegralConst();
 }
 
 inline bool GenTree::OperIsCopyBlkOp()
@@ -8074,21 +8269,6 @@ inline bool GenTree::OperIsCopyBlkOp()
 }
 
 //------------------------------------------------------------------------
-// IsFPZero: Checks whether this is a floating point constant with value 0.0
-//
-// Return Value:
-//    Returns true iff the tree is an GT_CNS_DBL, with value of 0.0.
-
-inline bool GenTree::IsFPZero() const
-{
-    if ((gtOper == GT_CNS_DBL) && (AsDblCon()->gtDconVal == 0.0))
-    {
-        return true;
-    }
-    return false;
-}
-
-//------------------------------------------------------------------------
 // IsIntegralConst: Checks whether this is a constant node with the given value
 //
 // Arguments:
@@ -8119,84 +8299,6 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal) const
 }
 
 //-------------------------------------------------------------------
-// IsIntegralConstVector: returns true if this is an SIMD vector
-// with all its elements equal to an integral constant.
-//
-// Arguments:
-//     constVal  -  const value of vector element
-//
-// Returns:
-//     True if this represents an integral const SIMD vector.
-//
-inline bool GenTree::IsIntegralConstVector(ssize_t constVal) const
-{
-#ifdef FEATURE_SIMD
-    // SIMDIntrinsicInit intrinsic with a const value as initializer
-    // represents a const vector.
-    if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit) &&
-        AsSIMD()->Op(1)->IsIntegralConst(constVal))
-    {
-        assert(varTypeIsIntegral(AsSIMD()->GetSimdBaseType()));
-        assert(AsSIMD()->GetOperandCount() == 1);
-        return true;
-    }
-#endif // FEATURE_SIMD
-
-#ifdef FEATURE_HW_INTRINSICS
-    if (gtOper == GT_HWINTRINSIC)
-    {
-        const GenTreeHWIntrinsic* node = AsHWIntrinsic();
-
-        if (!varTypeIsIntegral(node->GetSimdBaseType()))
-        {
-            // Can't be an integral constant
-            return false;
-        }
-
-        NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
-
-        if ((node->GetOperandCount() == 0) && (constVal == 0))
-        {
-#if defined(TARGET_XARCH)
-            return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
-#elif defined(TARGET_ARM64)
-            return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
-#endif // !TARGET_XARCH && !TARGET_ARM64
-        }
-        else if ((node->GetOperandCount() == 1) && node->Op(1)->IsIntegralConst(constVal))
-        {
-#if defined(TARGET_XARCH)
-            return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create);
-#elif defined(TARGET_ARM64)
-            return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create);
-#endif // !TARGET_XARCH && !TARGET_ARM64
-        }
-    }
-#endif // FEATURE_HW_INTRINSICS
-
-    return false;
-}
-
-//-------------------------------------------------------------------
-// IsSIMDZero: returns true if this is an SIMD vector with all its
-// elements equal to zero.
-//
-// Returns:
-//     True if this represents an integral const SIMD vector.
-//
-inline bool GenTree::IsSIMDZero() const
-{
-#ifdef FEATURE_SIMD
-    if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit))
-    {
-        return (AsSIMD()->Op(1)->IsIntegralConst(0) || AsSIMD()->Op(1)->IsFPZero());
-    }
-#endif
-
-    return false;
-}
-
-//-------------------------------------------------------------------
 // IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
 //
 // Returns:
@@ -8218,56 +8320,30 @@ inline bool GenTree::IsFloatPositiveZero() const
 }
 
 //-------------------------------------------------------------------
-// IsVectorZero: returns true if this node is a HWIntrinsic that is Vector*_get_Zero.
+// IsVectorZero: returns true if this node is a vector constant with all bits zero.
 //
 // Returns:
-//     True if this represents a HWIntrinsic node that is Vector*_get_Zero.
+//     True if this node is a vector constant with all bits zero
 //
-// TODO: We already have IsSIMDZero() and IsIntegralConstVector(0),
-//       however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating
-//       point. In order to not risk adverse behaviour by modifying those, this function 'IsVectorZero' was introduced.
-//       At some point, it makes sense to normalize this logic to be a single function call rather than have several
-//       separate ones; preferably this one.
 inline bool GenTree::IsVectorZero() const
 {
-#ifdef FEATURE_HW_INTRINSICS
-    if (gtOper == GT_HWINTRINSIC)
-    {
-        const GenTreeHWIntrinsic* node        = AsHWIntrinsic();
-        const NamedIntrinsic      intrinsicId = node->GetHWIntrinsicId();
-
-#if defined(TARGET_XARCH)
-        return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
-#elif defined(TARGET_ARM64)
-        return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
-#endif // !TARGET_XARCH && !TARGET_ARM64
-    }
-#endif // FEATURE_HW_INTRINSICS
-
-    return false;
+    return IsCnsVec() && AsVecCon()->IsZero();
 }
 
 //-------------------------------------------------------------------
-// IsVectorAllBitsSet: returns true if this node is a HWIntrinsic that is Vector*_get_AllBitsSet.
+// IsVectorAllBitsSet: returns true if this node is a vector constant with all bits set.
 //
 // Returns:
-//     True if this represents a HWIntrinsic node that is Vector*_get_AllBitsSet.
+//     True if this node is a vector constant with all bits set
 //
 inline bool GenTree::IsVectorAllBitsSet() const
 {
-#ifdef FEATURE_HW_INTRINSICS
-    if (gtOper == GT_HWINTRINSIC)
+#ifdef FEATURE_SIMD
+    if (OperIs(GT_CNS_VEC))
     {
-        const GenTreeHWIntrinsic* node        = AsHWIntrinsic();
-        const NamedIntrinsic      intrinsicId = node->GetHWIntrinsicId();
-
-#if defined(TARGET_XARCH)
-        return (intrinsicId == NI_Vector128_get_AllBitsSet) || (intrinsicId == NI_Vector256_get_AllBitsSet);
-#elif defined(TARGET_ARM64)
-        return (intrinsicId == NI_Vector64_get_AllBitsSet) || (intrinsicId == NI_Vector128_get_AllBitsSet);
-#endif // !TARGET_XARCH && !TARGET_ARM64
+        return AsVecCon()->IsAllBitsSet();
     }
-#endif // FEATURE_HW_INTRINSICS
+#endif // FEATURE_SIMD
 
     return false;
 }
@@ -8280,43 +8356,12 @@ inline bool GenTree::IsVectorAllBitsSet() const
 //
 inline bool GenTree::IsVectorConst()
 {
-#ifdef FEATURE_HW_INTRINSICS
-    if (gtOper == GT_HWINTRINSIC)
+#ifdef FEATURE_SIMD
+    if (OperIs(GT_CNS_VEC))
     {
-        const GenTreeHWIntrinsic* node        = AsHWIntrinsic();
-        const NamedIntrinsic      intrinsicId = node->GetHWIntrinsicId();
-
-#if defined(TARGET_XARCH)
-        if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create))
-        {
-            for (GenTree* arg : Operands())
-            {
-                if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl())
-                {
-                    return false;
-                }
-            }
-
-            return true;
-        }
-#elif defined(TARGET_ARM64)
-        if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create))
-        {
-            for (GenTree* arg : Operands())
-            {
-                if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl())
-                {
-                    return false;
-                }
-            }
-
-            return true;
-        }
-#endif // !TARGET_XARCH && !TARGET_ARM64
-
-        return IsVectorZero() || IsVectorAllBitsSet();
+        return true;
     }
-#endif // FEATURE_HW_INTRINSICS
+#endif // FEATURE_SIMD
 
     return false;
 }
@@ -8327,67 +8372,60 @@ inline bool GenTree::IsVectorConst()
 // Returns:
 //     The value of a given element in an integral vector constant
 //
-inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index)
+inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index, var_types simdBaseType)
 {
 #ifdef FEATURE_HW_INTRINSICS
-    if (gtOper == GT_HWINTRINSIC)
+    if (IsCnsVec())
     {
-        const GenTreeHWIntrinsic* node          = AsHWIntrinsic();
-        const NamedIntrinsic      intrinsicId   = node->GetHWIntrinsicId();
-        size_t                    operandsCount = node->GetOperandCount();
-
-        CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
-        var_types   simdBaseType    = node->GetSimdBaseType();
+        const GenTreeVecCon* node = AsVecCon();
 
-#if defined(TARGET_XARCH)
-        if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create))
-        {
-            return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue();
-        }
-#elif defined(TARGET_ARM64)
-        if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create))
+        switch (simdBaseType)
         {
-            return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue();
-        }
-#endif // !TARGET_XARCH && !TARGET_ARM64
+            case TYP_BYTE:
+            {
+                return node->gtSimd32Val.i8[index];
+            }
 
-        if (IsVectorZero())
-        {
-            return 0;
-        }
+            case TYP_UBYTE:
+            {
+                return node->gtSimd32Val.u8[index];
+            }
 
-        if (IsVectorAllBitsSet())
-        {
-            switch (simdBaseType)
+            case TYP_SHORT:
             {
-                case TYP_BYTE:
-                case TYP_UBYTE:
-                {
-                    return 0xFF;
-                }
+                return node->gtSimd32Val.i16[index];
+            }
 
-                case TYP_SHORT:
-                case TYP_USHORT:
-                {
-                    return 0xFFFF;
-                }
+            case TYP_USHORT:
+            {
+                return node->gtSimd32Val.u16[index];
+            }
 
-                case TYP_INT:
-                case TYP_UINT:
-                {
-                    return 0xFFFFFFFF;
-                }
+            case TYP_INT:
+            case TYP_FLOAT:
+            {
+                return node->gtSimd32Val.i32[index];
+            }
 
-                case TYP_LONG:
-                case TYP_ULONG:
-                {
-                    return 0xFFFFFFFFFFFFFFFF;
-                }
+            case TYP_UINT:
+            {
+                return node->gtSimd32Val.u32[index];
+            }
 
-                default:
-                {
-                    unreached();
-                }
+            case TYP_LONG:
+            case TYP_DOUBLE:
+            {
+                return node->gtSimd32Val.i64[index];
+            }
+
+            case TYP_ULONG:
+            {
+                return node->gtSimd32Val.u64[index];
+            }
+
+            default:
+            {
+                unreached();
             }
         }
     }
@@ -9009,12 +9047,12 @@ inline bool GenTree::IsIntCnsFitsInI32()
 
 inline bool GenTree::IsCnsFltOrDbl() const
 {
-    return OperGet() == GT_CNS_DBL;
+    return OperIs(GT_CNS_DBL);
 }
 
 inline bool GenTree::IsCnsNonZeroFltOrDbl() const
 {
-    if (OperGet() == GT_CNS_DBL)
+    if (IsCnsFltOrDbl())
     {
         double constValue = AsDblCon()->gtDconVal;
         return *(__int64*)&constValue != 0;
@@ -9023,6 +9061,11 @@ inline bool GenTree::IsCnsNonZeroFltOrDbl() const
     return false;
 }
 
+inline bool GenTree::IsCnsVec() const
+{
+    return OperIs(GT_CNS_VEC);
+}
+
 inline bool GenTree::IsHelperCall()
 {
     return OperGet() == GT_CALL && AsCall()->gtCallType == CT_HELPER;
index 0322afc..abc8fae 100644 (file)
@@ -46,6 +46,7 @@ GTNODE(CNS_INT          , GenTreeIntCon      ,0,GTK_LEAF)
 GTNODE(CNS_LNG          , GenTreeLngCon      ,0,GTK_LEAF)
 GTNODE(CNS_DBL          , GenTreeDblCon      ,0,GTK_LEAF)
 GTNODE(CNS_STR          , GenTreeStrCon      ,0,GTK_LEAF)
+GTNODE(CNS_VEC          , GenTreeVecCon      ,0,GTK_LEAF)
 
 //-----------------------------------------------------------------------------
 //  Unary  operators (1 operand):
index d5fa40b..1c4c554 100644 (file)
@@ -60,6 +60,7 @@ GTSTRUCT_1(IntCon      , GT_CNS_INT)
 GTSTRUCT_1(LngCon      , GT_CNS_LNG)
 GTSTRUCT_1(DblCon      , GT_CNS_DBL)
 GTSTRUCT_1(StrCon      , GT_CNS_STR)
+GTSTRUCT_1(VecCon      , GT_CNS_VEC)
 GTSTRUCT_N(LclVarCommon, GT_LCL_VAR, GT_LCL_FLD, GT_PHI_ARG, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)
 GTSTRUCT_3(LclVar      , GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_STORE_LCL_VAR)
 GTSTRUCT_3(LclFld      , GT_LCL_FLD, GT_STORE_LCL_FLD, GT_LCL_FLD_ADDR)
index 0d379f6..edb2786 100644 (file)
@@ -553,8 +553,190 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector64_Create:
         case NI_Vector128_Create:
         {
-            // We shouldn't handle this as an intrinsic if the
-            // respective ISAs have been disabled by the user.
+            uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+            assert((sig->numArgs == 1) || (sig->numArgs == simdLength));
+
+            bool isConstant = true;
+
+            if (varTypeIsFloating(simdBaseType))
+            {
+                for (uint32_t index = 0; index < sig->numArgs; index++)
+                {
+                    GenTree* arg = impStackTop(index).val;
+
+                    if (!arg->IsCnsFltOrDbl())
+                    {
+                        isConstant = false;
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                assert(varTypeIsIntegral(simdBaseType));
+
+                for (uint32_t index = 0; index < sig->numArgs; index++)
+                {
+                    GenTree* arg = impStackTop(index).val;
+
+                    if (!arg->IsIntegralConst())
+                    {
+                        isConstant = false;
+                        break;
+                    }
+                }
+            }
+
+            if (isConstant)
+            {
+                // Some of the below code assumes 8 or 16 byte SIMD types
+                assert((simdSize == 8) || (simdSize == 16));
+
+                // For create intrinsics that take 1 operand, we broadcast the value.
+                //
+                // This happens even for CreateScalarUnsafe since the upper bits are
+                // considered non-deterministic and we can therefore set them to anything.
+                //
+                // We do this as it simplifies the logic and allows certain code paths to
+                // have better codegen, such as for 0, AllBitsSet, or certain small constants
+
+                GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType);
+
+                switch (simdBaseType)
+                {
+                    case TYP_BYTE:
+                    case TYP_UBYTE:
+                    {
+                        uint8_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint8_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < simdLength - 1; index++)
+                            {
+                                vecCon->gtSimd16Val.u8[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_SHORT:
+                    case TYP_USHORT:
+                    {
+                        uint16_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint16_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd16Val.u16[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_INT:
+                    case TYP_UINT:
+                    {
+                        uint32_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint32_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd16Val.u32[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_LONG:
+                    case TYP_ULONG:
+                    {
+                        uint64_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint64_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd16Val.u64[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_FLOAT:
+                    {
+                        float cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<float>(impPopStack().val->AsDblCon()->gtDconVal);
+                            vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd16Val.f32[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_DOUBLE:
+                    {
+                        double cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<double>(impPopStack().val->AsDblCon()->gtDconVal);
+                            vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd16Val.f64[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    default:
+                    {
+                        unreached();
+                    }
+                }
+
+                retNode = vecCon;
+                break;
+            }
 
             IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs);
 
@@ -791,7 +973,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                                                /* isSimdAsHWIntrinsic */ false);
                 op1 = gtNewCastNode(TYP_INT, op1, /* isUnsigned */ true, TYP_INT);
 
-                GenTree* zero = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+                GenTree* zero  = gtNewZeroConNode(simdType, simdBaseJitType);
                 ssize_t  index = 8 / genTypeSize(simdBaseType);
 
                 op2 = gtNewSimdHWIntrinsicNode(simdType, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
@@ -857,10 +1039,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector64_get_AllBitsSet:
         case NI_Vector128_get_AllBitsSet:
         {
-            assert(!sig->hasThis());
-            assert(numArgs == 0);
-
-            retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize);
+            assert(sig->numArgs == 0);
+            retNode = gtNewAllBitsSetConNode(retType, simdBaseJitType);
             break;
         }
 
@@ -868,7 +1048,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_get_Zero:
         {
             assert(sig->numArgs == 0);
-            retNode = gtNewSimdZeroNode(retType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+            retNode = gtNewZeroConNode(retType, simdBaseJitType);
             break;
         }
 
@@ -892,7 +1072,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             //   AdvSimd.ExtractVector128(vector, Vector128<T>.Zero, 8 / sizeof(T)).GetLower();
             assert(numArgs == 1);
             op1            = impPopStack().val;
-            GenTree* zero  = gtNewSimdHWIntrinsicNode(retType, NI_Vector128_get_Zero, simdBaseJitType, simdSize);
+            GenTree* zero  = gtNewZeroConNode(retType, simdBaseJitType);
             ssize_t  index = 8 / genTypeSize(simdBaseType);
 
             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
@@ -1403,15 +1583,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 break;
             }
 
-            size_t elementSize  = genTypeSize(simdBaseType);
-            size_t elementCount = simdSize / elementSize;
-
-            if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize)
-            {
-                // TODO-ARM64-CQ: Handling reinterpreted vector constants is a bit more complex
-                break;
-            }
-
             if (sig->numArgs == 2)
             {
                 op2 = impSIMDPopStack(retType);
index 2bf4d7c..add3a61 100644 (file)
@@ -790,17 +790,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 break;
             }
 
-            // mvni doesn't support the range of element types, so hard code the 'opts' value.
-            case NI_Vector64_get_Zero:
-            case NI_Vector64_get_AllBitsSet:
-                GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_2S);
-                break;
-
-            case NI_Vector128_get_Zero:
-            case NI_Vector128_get_AllBitsSet:
-                GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S);
-                break;
-
             case NI_AdvSimd_DuplicateToVector64:
             case NI_AdvSimd_DuplicateToVector128:
             case NI_AdvSimd_Arm64_DuplicateToVector64:
index 42b20ce..c78c436 100644 (file)
@@ -1103,38 +1103,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
             break;
         }
 
-        case NI_Vector128_get_Zero:
-        case NI_Vector256_get_Zero:
-        {
-            emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
-            break;
-        }
-
-        case NI_Vector128_get_AllBitsSet:
-            if (varTypeIsFloating(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX))
-            {
-                // The following corresponds to vcmptrueps pseudo-op and not available without VEX prefix.
-                emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, targetReg, targetReg, 15);
-            }
-            else
-            {
-                emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
-            }
-            break;
-
-        case NI_Vector256_get_AllBitsSet:
-            if (varTypeIsIntegral(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
-            }
-            else
-            {
-                assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX));
-                // The following corresponds to vcmptrueps pseudo-op.
-                emit->emitIns_SIMD_R_R_R_I(INS_cmpps, attr, targetReg, targetReg, targetReg, 15);
-            }
-            break;
-
         default:
         {
             unreached();
index 6defcc2..0902554 100644 (file)
@@ -48,9 +48,9 @@ HARDWARE_INTRINSIC(Vector64,      EqualsAll,
 HARDWARE_INTRINSIC(Vector64,      EqualsAny,                                                         8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      ExtractMostSignificantBits,                                        8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      Floor,                                                             8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector64,      get_AllBitsSet,                                                    8,      0,     {INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},        HW_Category_Helper,                HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector64,      get_AllBitsSet,                                                    8,      0,     {INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},        HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Vector64,      get_Count,                                                         8,      0,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(Vector64,      get_Zero,                                                          8,      0,     {INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi},        HW_Category_Helper,                HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector64,      get_Zero,                                                          8,      0,     {INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi},        HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Vector64,      GetElement,                                                        8,      2,     {INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},         HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector64,      GreaterThan,                                                       8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      GreaterThanAll,                                                    8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
@@ -147,9 +147,9 @@ HARDWARE_INTRINSIC(Vector128,     EqualsAll,
 HARDWARE_INTRINSIC(Vector128,     EqualsAny,                                                        16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     ExtractMostSignificantBits,                                       16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     Floor,                                                            16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128,     get_AllBitsSet,                                                   16,      0,     {INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},        HW_Category_Helper,                HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector128,     get_AllBitsSet,                                                   16,      0,     {INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},        HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Vector128,     get_Count,                                                        16,      0,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(Vector128,     get_Zero,                                                         16,      0,     {INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi},        HW_Category_Helper,                HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector128,     get_Zero,                                                         16,      0,     {INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi},        HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Vector128,     GetElement,                                                       16,      2,     {INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},         HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,     GetLower,                                                         16,      1,     {INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov},         HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,     GetUpper,                                                         16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport)
index 994570f..8b5f283 100644 (file)
@@ -65,9 +65,9 @@ HARDWARE_INTRINSIC(Vector128,       EqualsAll,
 HARDWARE_INTRINSIC(Vector128,       EqualsAny,                                  16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       ExtractMostSignificantBits,                 16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       Floor,                                      16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                             16,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                             16,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(Vector128,       get_Count,                                  16,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128,       get_Zero,                                   16,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector128,       get_Zero,                                   16,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(Vector128,       GetElement,                                 16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,       GreaterThan,                                16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(Vector128,       GreaterThanAll,                             16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
@@ -162,9 +162,9 @@ HARDWARE_INTRINSIC(Vector256,       EqualsAll,
 HARDWARE_INTRINSIC(Vector256,       EqualsAny,                                  32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       ExtractMostSignificantBits,                 32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       Floor,                                      32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector256,       get_AllBitsSet,                             32,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector256,       get_AllBitsSet,                             32,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(Vector256,       get_Count,                                  32,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector256,       get_Zero,                                   32,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector256,       get_Zero,                                   32,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(Vector256,       GetElement,                                 32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector256,       GetLower,                                   32,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       GreaterThan,                                32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
index d48db66..7b2155f 100644 (file)
@@ -897,7 +897,194 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
 
         case NI_Vector128_Create:
         case NI_Vector256_Create:
+        case NI_Vector128_CreateScalarUnsafe:
+        case NI_Vector256_CreateScalarUnsafe:
         {
+            uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+            assert((sig->numArgs == 1) || (sig->numArgs == simdLength));
+
+            bool isConstant = true;
+
+            if (varTypeIsFloating(simdBaseType))
+            {
+                for (uint32_t index = 0; index < sig->numArgs; index++)
+                {
+                    GenTree* arg = impStackTop(index).val;
+
+                    if (!arg->IsCnsFltOrDbl())
+                    {
+                        isConstant = false;
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                assert(varTypeIsIntegral(simdBaseType));
+
+                for (uint32_t index = 0; index < sig->numArgs; index++)
+                {
+                    GenTree* arg = impStackTop(index).val;
+
+                    if (!arg->IsIntegralConst())
+                    {
+                        isConstant = false;
+                        break;
+                    }
+                }
+            }
+
+            if (isConstant)
+            {
+                // Some of the below code assumes 16 or 32 byte SIMD types
+                assert((simdSize == 16) || (simdSize == 32));
+
+                // For create intrinsics that take 1 operand, we broadcast the value.
+                //
+                // This happens even for CreateScalarUnsafe since the upper bits are
+                // considered non-deterministic and we can therefore set them to anything.
+                //
+                // We do this as it simplifies the logic and allows certain code paths to
+                // have better codegen, such as for 0, AllBitsSet, or certain small constants
+
+                GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType);
+
+                switch (simdBaseType)
+                {
+                    case TYP_BYTE:
+                    case TYP_UBYTE:
+                    {
+                        uint8_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint8_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd32Val.u8[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < simdLength - 1; index++)
+                            {
+                                vecCon->gtSimd32Val.u8[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_SHORT:
+                    case TYP_USHORT:
+                    {
+                        uint16_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint16_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd32Val.u16[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd32Val.u16[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_INT:
+                    case TYP_UINT:
+                    {
+                        uint32_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint32_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd32Val.u32[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd32Val.u32[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_LONG:
+                    case TYP_ULONG:
+                    {
+                        uint64_t cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<uint64_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+                            vecCon->gtSimd32Val.u64[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd32Val.u64[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_FLOAT:
+                    {
+                        float cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<float>(impPopStack().val->AsDblCon()->gtDconVal);
+                            vecCon->gtSimd32Val.f32[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd32Val.f32[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    case TYP_DOUBLE:
+                    {
+                        double cnsVal = 0;
+
+                        for (uint32_t index = 0; index < sig->numArgs; index++)
+                        {
+                            cnsVal = static_cast<double>(impPopStack().val->AsDblCon()->gtDconVal);
+                            vecCon->gtSimd32Val.f64[simdLength - 1 - index] = cnsVal;
+                        }
+
+                        if (sig->numArgs == 1)
+                        {
+                            for (uint32_t index = 0; index < (simdLength - 1); index++)
+                            {
+                                vecCon->gtSimd32Val.f64[index] = cnsVal;
+                            }
+                        }
+                        break;
+                    }
+
+                    default:
+                    {
+                        unreached();
+                    }
+                }
+
+                retNode = vecCon;
+                break;
+            }
+
 #if defined(TARGET_X86)
             if (varTypeIsLong(simdBaseType))
             {
@@ -919,26 +1106,6 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_CreateScalarUnsafe:
-        case NI_Vector256_CreateScalarUnsafe:
-        {
-            assert(sig->numArgs == 1);
-
-#ifdef TARGET_X86
-            if (varTypeIsLong(simdBaseType))
-            {
-                // TODO-XARCH-CQ: It may be beneficial to emit the movq
-                // instruction, which takes a 64-bit memory address and
-                // works on 32-bit x86 systems.
-                break;
-            }
-#endif // TARGET_X86
-
-            op1     = impPopStack().val;
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector128_Divide:
         case NI_Vector256_Divide:
         case NI_Vector128_op_Division:
@@ -1217,7 +1384,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector256_get_AllBitsSet:
         {
             assert(sig->numArgs == 0);
-            retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize);
+            retNode = gtNewAllBitsSetConNode(retType, simdBaseJitType);
             break;
         }
 
@@ -1236,7 +1403,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector256_get_Zero:
         {
             assert(sig->numArgs == 0);
-            retNode = gtNewSimdZeroNode(retType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+            retNode = gtNewZeroConNode(retType, simdBaseJitType);
             break;
         }
 
@@ -1895,6 +2062,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector256_Shuffle:
         {
             assert((sig->numArgs == 2) || (sig->numArgs == 3));
+            assert((simdSize == 16) || (simdSize == 32));
 
             GenTree* indices = impStackTop(0).val;
 
@@ -1907,12 +2075,6 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             size_t elementSize  = genTypeSize(simdBaseType);
             size_t elementCount = simdSize / elementSize;
 
-            if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize)
-            {
-                // TODO-XARCH-CQ: Handling reinterpreted vector constants is a bit more complex
-                break;
-            }
-
             if (simdSize == 32)
             {
                 if (!compExactlyDependsOn(InstructionSet_AVX2))
@@ -1927,7 +2089,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
 
                     for (size_t index = 0; index < elementCount; index++)
                     {
-                        uint64_t value = indices->GetIntegralVectorConstElement(index);
+                        uint64_t value = indices->GetIntegralVectorConstElement(index, simdBaseType);
 
                         if (value >= elementCount)
                         {
index 12700f4..a9059ba 100644 (file)
@@ -334,6 +334,7 @@ void Compiler::impSaveStackState(SavedStack* savePtr, bool copy)
                     case GT_CNS_LNG:
                     case GT_CNS_DBL:
                     case GT_CNS_STR:
+                    case GT_CNS_VEC:
                     case GT_LCL_VAR:
                         table->val = gtCloneExpr(tree);
                         break;
@@ -1216,7 +1217,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
 #endif // FEATURE_HW_INTRINSICS
     {
         assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_OBJ, GT_CALL, GT_MKREFANY, GT_RET_EXPR,
-                           GT_COMMA) ||
+                           GT_COMMA, GT_CNS_VEC) ||
                ((src->TypeGet() != TYP_STRUCT) && src->OperIsSIMD()));
     }
 #endif // DEBUG
@@ -1588,7 +1589,7 @@ GenTree* Compiler::impGetStructAddr(GenTree*             structVal,
         return (structVal->AsObj()->Addr());
     }
     else if (oper == GT_CALL || oper == GT_RET_EXPR || oper == GT_OBJ || oper == GT_MKREFANY ||
-             structVal->OperIsSimdOrHWintrinsic())
+             structVal->OperIsSimdOrHWintrinsic() || structVal->IsCnsVec())
     {
         unsigned tmpNum = lvaGrabTemp(true DEBUGARG("struct address for call/obj"));
 
@@ -1781,6 +1782,12 @@ GenTree* Compiler::impNormStructVal(GenTree*             structVal,
             alreadyNormalized = true;
             break;
 
+        case GT_CNS_VEC:
+        {
+            assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
+            break;
+        }
+
 #ifdef FEATURE_SIMD
         case GT_SIMD:
             assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
@@ -1820,7 +1827,7 @@ GenTree* Compiler::impNormStructVal(GenTree*             structVal,
             }
 
 #ifdef FEATURE_SIMD
-            if (blockNode->OperIsSimdOrHWintrinsic())
+            if (blockNode->OperIsSimdOrHWintrinsic() || blockNode->IsCnsVec())
             {
                 parent->AsOp()->gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization);
                 alreadyNormalized     = true;
index 7d1b0ab..a3f2142 100644 (file)
@@ -161,7 +161,6 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(
     int       simdSize;
     var_types simdType;
 
-    NamedIntrinsic niZero;
     NamedIntrinsic niEquals;
 
     GenTree* cnsVec1     = nullptr;
@@ -192,7 +191,6 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(
         simdSize = 32;
         simdType = TYP_SIMD32;
 
-        niZero   = NI_Vector256_get_Zero;
         niEquals = NI_Vector256_op_Equality;
 
         // Special case: use a single vector for Length == 16
@@ -217,7 +215,6 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(
         simdSize = 16;
         simdType = TYP_SIMD16;
 
-        niZero   = NI_Vector128_get_Zero;
         niEquals = NI_Vector128_op_Equality;
 
         // Special case: use a single vector for Length == 8
@@ -239,7 +236,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(
         return nullptr;
     }
 
-    GenTree* zero = gtNewSimdHWIntrinsicNode(simdType, niZero, baseType, simdSize);
+    GenTree* zero = gtNewZeroConNode(simdType, baseType);
 
     GenTree* offset1  = gtNewIconNode(dataOffset, TYP_I_IMPL);
     GenTree* offset2  = gtNewIconNode(dataOffset + len * sizeof(USHORT) - simdSize, TYP_I_IMPL);
index 67ae437..4a3ab3e 100644 (file)
@@ -838,6 +838,41 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
                 assert(op->isContainedIntOrIImmed());
                 return OperandDesc(op->AsIntCon()->IconValue(), op->AsIntCon()->ImmedValNeedsReloc(compiler));
 
+            case GT_CNS_VEC:
+            {
+                switch (op->TypeGet())
+                {
+#if defined(FEATURE_SIMD)
+                    case TYP_LONG:
+                    case TYP_DOUBLE:
+                    case TYP_SIMD8:
+                    {
+                        // TODO-1stClassStructs: do not retype SIMD nodes
+                        simd8_t constValue = op->AsVecCon()->gtSimd8Val;
+                        return OperandDesc(emit->emitSimd8Const(constValue));
+                    }
+
+                    case TYP_SIMD12:
+                    case TYP_SIMD16:
+                    {
+                        simd16_t constValue = op->AsVecCon()->gtSimd16Val;
+                        return OperandDesc(emit->emitSimd16Const(constValue));
+                    }
+
+                    case TYP_SIMD32:
+                    {
+                        simd32_t constValue = op->AsVecCon()->gtSimd32Val;
+                        return OperandDesc(emit->emitSimd32Const(constValue));
+                    }
+#endif // FEATURE_SIMD
+
+                    default:
+                    {
+                        unreached();
+                    }
+                }
+            }
+
             default:
                 unreached();
         }
index 7f9c11a..c540c48 100644 (file)
@@ -2022,6 +2022,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
             case GT_CNS_LNG:
             case GT_CNS_DBL:
             case GT_CNS_STR:
+            case GT_CNS_VEC:
             case GT_CLS_VAR_ADDR:
             case GT_PHYSREG:
                 // These are all side-effect-free leaf nodes.
index ac9c938..edeee01 100644 (file)
@@ -318,8 +318,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
 
 #ifdef FEATURE_HW_INTRINSICS
         case GT_HWINTRINSIC:
-            LowerHWIntrinsic(node->AsHWIntrinsic());
-            break;
+            return LowerHWIntrinsic(node->AsHWIntrinsic());
 #endif // FEATURE_HW_INTRINSICS
 
         case GT_LCL_FLD:
@@ -3502,28 +3501,21 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
         else if (src->OperIs(GT_CNS_INT))
         {
             assert(src->IsIntegralConst(0) && "expected an INIT_VAL for non-zero init.");
+
 #ifdef FEATURE_SIMD
             if (varTypeIsSIMD(lclRegType))
             {
-                CorInfoType simdBaseJitType = comp->getBaseJitTypeOfSIMDLocal(lclStore);
-                if (simdBaseJitType == CORINFO_TYPE_UNDEF)
-                {
-                    // Lie about the type if we don't know/have it.
-                    simdBaseJitType = CORINFO_TYPE_FLOAT;
-                }
-                GenTreeSIMD* simdTree =
-                    comp->gtNewSIMDNode(lclRegType, src, SIMDIntrinsicInit, simdBaseJitType, varDsc->lvExactSize);
-                BlockRange().InsertAfter(src, simdTree);
-                LowerSIMD(simdTree);
-                src               = simdTree;
-                lclStore->gtOp1   = src;
-                convertToStoreObj = false;
+                GenTree* zeroCon = comp->gtNewZeroConNode(lclRegType, CORINFO_TYPE_FLOAT);
+
+                BlockRange().InsertAfter(src, zeroCon);
+                BlockRange().Remove(src);
+
+                src             = zeroCon;
+                lclStore->gtOp1 = src;
             }
-            else
 #endif // FEATURE_SIMD
-            {
-                convertToStoreObj = false;
-            }
+
+            convertToStoreObj = false;
         }
         else if (src->OperIs(GT_LCL_VAR))
         {
index c8e7219..7e6acf6 100644 (file)
@@ -337,17 +337,17 @@ private:
     void LowerSIMD(GenTreeSIMD* simdNode);
 #endif // FEATURE_SIMD
 #ifdef FEATURE_HW_INTRINSICS
-    void LowerHWIntrinsic(GenTreeHWIntrinsic* node);
+    GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
     void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
+    GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
     void LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
-    void LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
-    void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
-    void LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
+    GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
+    GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
 #if defined(TARGET_XARCH)
     void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
     void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
     void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
-    void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
+    GenTree* LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
     GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
     GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode);
     GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
@@ -358,176 +358,6 @@ private:
     GenTree* LowerModPow2(GenTree* node);
     GenTree* LowerAddForPossibleContainment(GenTreeOp* node);
 #endif // !TARGET_XARCH && !TARGET_ARM64
-
-    union VectorConstant {
-        int8_t   i8[32];
-        uint8_t  u8[32];
-        int16_t  i16[16];
-        uint16_t u16[16];
-        int32_t  i32[8];
-        uint32_t u32[8];
-        int64_t  i64[4];
-        uint64_t u64[4];
-        float    f32[8];
-        double   f64[4];
-    };
-
-    //----------------------------------------------------------------------------------------------
-    // VectorConstantIsBroadcastedI64: Check N i64 elements in a constant vector for equality
-    //
-    //  Arguments:
-    //     vecCns  - Constant vector
-    //     count   - Amount of i64 components to compare
-    //
-    //  Returns:
-    //     true if N i64 elements of the given vector are equal
-    static bool VectorConstantIsBroadcastedI64(VectorConstant& vecCns, int count)
-    {
-        assert(count >= 1 && count <= 4);
-        for (int i = 1; i < count; i++)
-        {
-            if (vecCns.i64[i] != vecCns.i64[0])
-            {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    //----------------------------------------------------------------------------------------------
-    // ProcessArgForHWIntrinsicCreate: Processes an argument for the Lowering::LowerHWIntrinsicCreate method
-    //
-    //  Arguments:
-    //     arg      - The argument to process
-    //     argIdx   - The index of the argument being processed
-    //     vecCns   - The vector constant being constructed
-    //     baseType - The base type of the vector constant
-    //
-    //  Returns:
-    //     true if arg was a constant; otherwise, false
-    static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, VectorConstant& vecCns, var_types baseType)
-    {
-        switch (baseType)
-        {
-            case TYP_BYTE:
-            case TYP_UBYTE:
-            {
-                if (arg->IsCnsIntOrI())
-                {
-                    vecCns.i8[argIdx] = static_cast<int8_t>(arg->AsIntCon()->gtIconVal);
-                    return true;
-                }
-                else
-                {
-                    // We expect the VectorConstant to have been already zeroed
-                    assert(vecCns.i8[argIdx] == 0);
-                }
-                break;
-            }
-
-            case TYP_SHORT:
-            case TYP_USHORT:
-            {
-                if (arg->IsCnsIntOrI())
-                {
-                    vecCns.i16[argIdx] = static_cast<int16_t>(arg->AsIntCon()->gtIconVal);
-                    return true;
-                }
-                else
-                {
-                    // We expect the VectorConstant to have been already zeroed
-                    assert(vecCns.i16[argIdx] == 0);
-                }
-                break;
-            }
-
-            case TYP_INT:
-            case TYP_UINT:
-            {
-                if (arg->IsCnsIntOrI())
-                {
-                    vecCns.i32[argIdx] = static_cast<int32_t>(arg->AsIntCon()->gtIconVal);
-                    return true;
-                }
-                else
-                {
-                    // We expect the VectorConstant to have been already zeroed
-                    assert(vecCns.i32[argIdx] == 0);
-                }
-                break;
-            }
-
-            case TYP_LONG:
-            case TYP_ULONG:
-            {
-#if defined(TARGET_64BIT)
-                if (arg->IsCnsIntOrI())
-                {
-                    vecCns.i64[argIdx] = static_cast<int64_t>(arg->AsIntCon()->gtIconVal);
-                    return true;
-                }
-#else
-                if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI())
-                {
-                    // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT
-                    // We need to reconstruct the 64-bit value in order to handle this
-
-                    INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal;
-                    gtLconVal <<= 32;
-                    gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal;
-
-                    vecCns.i64[argIdx] = gtLconVal;
-                    return true;
-                }
-#endif // TARGET_64BIT
-                else
-                {
-                    // We expect the VectorConstant to have been already zeroed
-                    assert(vecCns.i64[argIdx] == 0);
-                }
-                break;
-            }
-
-            case TYP_FLOAT:
-            {
-                if (arg->IsCnsFltOrDbl())
-                {
-                    vecCns.f32[argIdx] = static_cast<float>(arg->AsDblCon()->gtDconVal);
-                    return true;
-                }
-                else
-                {
-                    // We expect the VectorConstant to have been already zeroed
-                    // We check against the i32, rather than f32, to account for -0.0
-                    assert(vecCns.i32[argIdx] == 0);
-                }
-                break;
-            }
-
-            case TYP_DOUBLE:
-            {
-                if (arg->IsCnsFltOrDbl())
-                {
-                    vecCns.f64[argIdx] = static_cast<double>(arg->AsDblCon()->gtDconVal);
-                    return true;
-                }
-                else
-                {
-                    // We expect the VectorConstant to have been already zeroed
-                    // We check against the i64, rather than f64, to account for -0.0
-                    assert(vecCns.i64[argIdx] == 0);
-                }
-                break;
-            }
-
-            default:
-            {
-                unreached();
-            }
-        }
-
-        return false;
-    }
 #endif // FEATURE_HW_INTRINSICS
 
     //----------------------------------------------------------------------------------------------
index 02d90fe..5ee0c27 100644 (file)
@@ -53,7 +53,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
     if (!varTypeIsFloating(parentNode->TypeGet()))
     {
 #ifdef TARGET_ARM64
-        if (parentNode->OperIsCompare() && childNode->IsFPZero())
+        if (parentNode->OperIsCompare() && childNode->IsFloatPositiveZero())
         {
             // Contain 0.0 constant in fcmp on arm64
             // TODO: Enable for arm too (vcmp)
@@ -917,7 +917,7 @@ void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node)
 //  Arguments:
 //     node - The hardware intrinsic node.
 //
-void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 {
     assert(node->TypeGet() != TYP_SIMD32);
 
@@ -940,31 +940,25 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
             // that the node is modified to either not be a HWIntrinsic node or that it is no longer
             // the same intrinsic as when it came in.
 
-            LowerHWIntrinsicCreate(node);
-            assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId));
-            LowerNode(node);
-            return;
+            return LowerHWIntrinsicCreate(node);
         }
 
         case NI_Vector64_Dot:
         case NI_Vector128_Dot:
         {
-            LowerHWIntrinsicDot(node);
-            return;
+            return LowerHWIntrinsicDot(node);
         }
 
         case NI_Vector64_op_Equality:
         case NI_Vector128_op_Equality:
         {
-            LowerHWIntrinsicCmpOp(node, GT_EQ);
-            return;
+            return LowerHWIntrinsicCmpOp(node, GT_EQ);
         }
 
         case NI_Vector64_op_Inequality:
         case NI_Vector128_op_Inequality:
         {
-            LowerHWIntrinsicCmpOp(node, GT_NE);
-            return;
+            return LowerHWIntrinsicCmpOp(node, GT_NE);
         }
 
         case NI_AdvSimd_FusedMultiplyAddScalar:
@@ -976,6 +970,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
     }
 
     ContainCheckHWIntrinsic(node);
+    return node->gtNext;
 }
 
 //----------------------------------------------------------------------------------------------
@@ -1051,7 +1046,7 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
 //     node  - The hardware intrinsic node.
 //     cmpOp - The comparison operation, currently must be GT_EQ or GT_NE
 //
-void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
+GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 {
     NamedIntrinsic intrinsicId     = node->GetHWIntrinsicId();
     CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
@@ -1121,7 +1116,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
         node->gtType = TYP_VOID;
         node->ClearUnusedValue();
         LowerNode(node);
-        return;
+        return node->gtNext;
     }
 
     NamedIntrinsic cmpIntrinsic;
@@ -1211,6 +1206,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
     node->ClearUnusedValue();
 
     LowerNode(node);
+    return node->gtNext;
 }
 
 //----------------------------------------------------------------------------------------------
@@ -1226,14 +1222,14 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 //  Arguments:
 //     node - The hardware intrinsic node.
 //
-void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicId     = node->GetHWIntrinsicId();
     var_types      simdType        = node->TypeGet();
     CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
     var_types      simdBaseType    = node->GetSimdBaseType();
     unsigned       simdSize        = node->GetSimdSize();
-    VectorConstant vecCns          = {};
+    simd32_t       simd32Val       = {};
 
     if ((simdSize == 8) && (simdType == TYP_DOUBLE))
     {
@@ -1246,85 +1242,46 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
     assert(varTypeIsArithmetic(simdBaseType));
     assert(simdSize != 0);
 
-    size_t argCnt    = node->GetOperandCount();
-    size_t cnsArgCnt = 0;
-
-    // These intrinsics are meant to set the same value to every element.
-    if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType))
-    {
-        // Now assign the rest of the arguments.
-        for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
-        {
-            HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType);
-        }
-
-        cnsArgCnt = 1;
-    }
-    else
-    {
-        for (unsigned i = 1; i <= argCnt; i++)
-        {
-            if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType))
-            {
-                cnsArgCnt++;
-            }
-        }
-    }
-    assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
+    bool   isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
+    size_t argCnt     = node->GetOperandCount();
 
     // Check if we have a cast that we can remove. Note that "IsValidConstForMovImm"
     // will reset Op(1) if it finds such a cast, so we do not need to handle it here.
     // TODO-Casts: why are casts from constants checked for here?
-    if ((argCnt == cnsArgCnt) && (argCnt == 1) && IsValidConstForMovImm(node))
+    if (isConstant && (argCnt == 1) && IsValidConstForMovImm(node))
     {
-        // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector
+        // Set isConstant to false so we get lowered to a DuplicateToVector
         // intrinsic, which will itself mark the node as contained.
-        cnsArgCnt = 0;
+        isConstant = false;
     }
 
-    if (argCnt == cnsArgCnt)
+    if (isConstant)
     {
+        assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16));
+
         for (GenTree* arg : node->Operands())
         {
             BlockRange().Remove(arg);
         }
 
-        assert((simdSize == 8) || (simdSize == 16));
+        GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType, simdBaseJitType);
 
-        if (VectorConstantIsBroadcastedI64(vecCns, simdSize / 8))
-        {
-            // If we are a single constant or if all parts are the same, we might be able to optimize
-            // this even further for certain values, such as Zero or AllBitsSet.
+        vecCon->gtSimd32Val = simd32Val;
+        BlockRange().InsertBefore(node, vecCon);
 
-            if (vecCns.i64[0] == 0)
-            {
-                node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero);
-                return;
-            }
-            else if (vecCns.i64[0] == -1)
-            {
-                node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet);
-                return;
-            }
+        LIR::Use use;
+        if (BlockRange().TryGetUse(node, &use))
+        {
+            use.ReplaceWith(vecCon);
+        }
+        else
+        {
+            vecCon->SetUnusedValue();
         }
 
-        unsigned  cnsSize  = (simdSize == 12) ? 16 : simdSize;
-        unsigned  cnsAlign = cnsSize;
-        var_types dataType = Compiler::getSIMDTypeForSize(simdSize);
-
-        UNATIVE_OFFSET       cnum       = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType);
-        CORINFO_FIELD_HANDLE hnd        = comp->eeFindJitDataOffs(cnum);
-        GenTree*             clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd);
-        BlockRange().InsertBefore(node, clsVarAddr);
-
-        node->ChangeOper(GT_IND);
-        node->AsOp()->gtOp1 = clsVarAddr;
-
-        // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial
-        // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just
-        // insert the non-constant values which should still allow some gains.
+        BlockRange().Remove(node);
 
-        return;
+        return LowerNode(vecCon);
     }
     else if (argCnt == 1)
     {
@@ -1349,7 +1306,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
             node->ChangeHWIntrinsicId((simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64
                                                               : NI_AdvSimd_DuplicateToVector128);
         }
-        return;
+
+        return LowerNode(node);
     }
 
     // We have the following (where simd is simd8 or simd16):
@@ -1411,6 +1369,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
     BlockRange().InsertBefore(opN, idx);
 
     node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp1, idx, opN);
+
+    return LowerNode(node);
 }
 
 //----------------------------------------------------------------------------------------------
@@ -1419,7 +1379,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 //  Arguments:
 //     node - The hardware intrinsic node.
 //
-void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicId     = node->GetHWIntrinsicId();
     CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
@@ -1697,8 +1657,9 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
     //   return tmp2.ToScalar();
 
     node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2);
+
     LowerNode(node);
-    return;
+    return node->gtNext;
 }
 #endif // FEATURE_HW_INTRINSICS
 
@@ -1962,16 +1923,12 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
     const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc);
 
 #ifdef FEATURE_SIMD
-    if (varTypeIsSIMD(storeLoc))
+    if (storeLoc->TypeIs(TYP_SIMD8, TYP_SIMD12))
     {
         // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR.
-        if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister)
+        if ((op1->IsIntegralConst(0) || op1->IsVectorZero()) && varDsc->lvDoNotEnregister)
         {
             MakeSrcContained(storeLoc, op1);
-            if (op1->IsSIMDZero())
-            {
-                MakeSrcContained(op1, op1->AsSIMD()->Op(1));
-            }
         }
         return;
     }
index 78ac528..7a2cde0 100644 (file)
@@ -733,17 +733,13 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
     const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc);
 
 #ifdef FEATURE_SIMD
-    if (varTypeIsSIMD(storeLoc))
+    if (storeLoc->TypeIs(TYP_SIMD8, TYP_SIMD12))
     {
         // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR.
-        if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister)
+        if ((op1->IsIntegralConst(0) || op1->IsVectorZero()) && varDsc->lvDoNotEnregister)
         {
             // For an InitBlk we want op1 to be contained
             MakeSrcContained(storeLoc, op1);
-            if (op1->IsSIMDZero())
-            {
-                MakeSrcContained(op1, op1->gtGetOp1());
-            }
         }
         return;
     }
index e47878a..46990d9 100644 (file)
@@ -918,7 +918,7 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node)
 //  Arguments:
 //     node - The hardware intrinsic node.
 //
-void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 {
     if (node->TypeGet() == TYP_SIMD12)
     {
@@ -948,17 +948,13 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
             // it into 2x Vector128.Create intrinsics which themselves are also lowered into other
             // intrinsics that are not Vector*.Create
 
-            LowerHWIntrinsicCreate(node);
-            assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId));
-            LowerNode(node);
-            return;
+            return LowerHWIntrinsicCreate(node);
         }
 
         case NI_Vector128_Dot:
         case NI_Vector256_Dot:
         {
-            LowerHWIntrinsicDot(node);
-            return;
+            return LowerHWIntrinsicDot(node);
         }
 
         case NI_Vector128_GetElement:
@@ -976,30 +972,28 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
                 // However, certain types may not have a direct equivalent
                 // in which case we specially handle them directly as GetElement
                 // and want to do the relevant containment checks.
-                break;
+                ContainCheckHWIntrinsic(node);
             }
-            return;
+
+            return node->gtNext;
         }
 
         case NI_Vector128_WithElement:
         case NI_Vector256_WithElement:
         {
-            LowerHWIntrinsicWithElement(node);
-            return;
+            return LowerHWIntrinsicWithElement(node);
         }
 
         case NI_Vector128_op_Equality:
         case NI_Vector256_op_Equality:
         {
-            LowerHWIntrinsicCmpOp(node, GT_EQ);
-            return;
+            return LowerHWIntrinsicCmpOp(node, GT_EQ);
         }
 
         case NI_Vector128_op_Inequality:
         case NI_Vector256_op_Inequality:
         {
-            LowerHWIntrinsicCmpOp(node, GT_NE);
-            return;
+            return LowerHWIntrinsicCmpOp(node, GT_NE);
         }
 
         case NI_Vector128_ToScalar:
@@ -1217,6 +1211,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
     }
 
     ContainCheckHWIntrinsic(node);
+    return node->gtNext;
 }
 
 //----------------------------------------------------------------------------------------------
@@ -1226,7 +1221,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 //     node  - The hardware intrinsic node.
 //     cmpOp - The comparison operation, currently must be GT_EQ or GT_NE
 //
-void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
+GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 {
     NamedIntrinsic intrinsicId     = node->GetHWIntrinsicId();
     CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
@@ -1253,7 +1248,8 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 
     GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE;
 
-    if (op2->IsIntegralConstVector(0) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+    if (!varTypeIsFloating(simdBaseType) && op2->IsVectorZero() &&
+        comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
     {
         // On SSE4.1 or higher we can optimize comparisons against zero to
         // just use PTEST. We can't support it for floating-point, however,
@@ -1262,14 +1258,6 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
         node->Op(1) = op1;
         BlockRange().Remove(op2);
 
-        if (op2->AsMultiOp()->GetOperandCount() == 1)
-        {
-            // Some zero vectors are Create/Initialization nodes with a constant zero operand
-            // We should also remove this to avoid dead code
-            assert(op2->AsMultiOp()->Op(1)->IsIntegralConst(0));
-            BlockRange().Remove(op2->AsMultiOp()->Op(1));
-        }
-
         LIR::Use op1Use(BlockRange(), &node->Op(1), node);
         ReplaceWithLclVar(op1Use);
         op1 = node->Op(1);
@@ -1291,7 +1279,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
             LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd);
         }
 
-        return;
+        return node->gtNext;
     }
 
     NamedIntrinsic cmpIntrinsic;
@@ -1457,7 +1445,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
     node->gtType = TYP_VOID;
     node->ClearUnusedValue();
 
-    LowerNode(node);
+    return LowerNode(node);
 }
 
 //----------------------------------------------------------------------------------------------
@@ -1605,14 +1593,14 @@ void Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
 //  Arguments:
 //     node - The hardware intrinsic node.
 //
-void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicId     = node->GetHWIntrinsicId();
     var_types      simdType        = node->gtType;
     CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
     var_types      simdBaseType    = node->GetSimdBaseType();
     unsigned       simdSize        = node->GetSimdSize();
-    VectorConstant vecCns          = {};
+    simd32_t       simd32Val       = {};
 
     if ((simdSize == 8) && (simdType == TYP_DOUBLE))
     {
@@ -1634,34 +1622,13 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
     GenTree* tmp2 = nullptr;
     GenTree* tmp3 = nullptr;
 
-    size_t argCnt    = node->GetOperandCount();
-    size_t cnsArgCnt = 0;
+    bool   isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
+    size_t argCnt     = node->GetOperandCount();
 
-    // These intrinsics are meant to set the same value to every element.
-    if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType))
-    {
-        // Now assign the rest of the arguments.
-        for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
-        {
-            HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType);
-        }
-
-        cnsArgCnt = 1;
-    }
-    else
+    if (isConstant)
     {
-        for (unsigned i = 1; i <= argCnt; i++)
-        {
-            if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType))
-            {
-                cnsArgCnt++;
-            }
-        }
-    }
-    assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
+        assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == 32));
 
-    if (argCnt == cnsArgCnt)
-    {
         for (GenTree* arg : node->Operands())
         {
 #if !defined(TARGET_64BIT)
@@ -1674,43 +1641,24 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
             BlockRange().Remove(arg);
         }
 
-        assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == 32));
+        GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType, simdBaseJitType);
 
-        if (((simdSize == 16) || (simdSize == 32)) && VectorConstantIsBroadcastedI64(vecCns, simdSize / 8))
-        {
-            // If we are a single constant or if all parts are the same, we might be able to optimize
-            // this even further for certain values, such as Zero or AllBitsSet.
+        vecCon->gtSimd32Val = simd32Val;
+        BlockRange().InsertBefore(node, vecCon);
 
-            if (vecCns.i64[0] == 0)
-            {
-                node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_Zero : NI_Vector256_get_Zero);
-                return;
-            }
-            else if (vecCns.i64[0] == -1)
-            {
-                node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_AllBitsSet : NI_Vector256_get_AllBitsSet);
-                return;
-            }
+        LIR::Use use;
+        if (BlockRange().TryGetUse(node, &use))
+        {
+            use.ReplaceWith(vecCon);
+        }
+        else
+        {
+            vecCon->SetUnusedValue();
         }
 
-        unsigned cnsSize = (simdSize != 12) ? simdSize : 16;
-        unsigned cnsAlign =
-            (comp->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : emitter::dataSection::MIN_DATA_ALIGN;
-        var_types dataType = Compiler::getSIMDTypeForSize(simdSize);
-
-        UNATIVE_OFFSET       cnum       = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType);
-        CORINFO_FIELD_HANDLE hnd        = comp->eeFindJitDataOffs(cnum);
-        GenTree*             clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd);
-        BlockRange().InsertBefore(node, clsVarAddr);
-
-        node->ChangeOper(GT_IND);
-        node->AsOp()->gtOp1 = clsVarAddr;
-
-        // TODO-XARCH-CQ: We should be able to modify at least the paths that use Insert to trivially support partial
-        // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just
-        // insert the non-constant values which should still allow some gains.
+        BlockRange().Remove(node);
 
-        return;
+        return LowerNode(vecCon);
     }
     else if (argCnt == 1)
     {
@@ -1738,7 +1686,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
                 LowerNode(tmp1);
 
                 node->ResetHWIntrinsicId(NI_AVX2_BroadcastScalarToVector256, tmp1);
-                return;
+
+                return LowerNode(node);
             }
 
             assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX));
@@ -1766,9 +1715,10 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 
             tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector128_Create, simdBaseJitType, 16);
             BlockRange().InsertAfter(op1, tmp1);
-            LowerNode(tmp1);
 
             node->Op(1) = tmp1;
+            LowerNode(tmp1);
+
             LIR::Use tmp1Use(BlockRange(), &node->Op(1), node);
             ReplaceWithLclVar(tmp1Use);
             tmp1 = node->Op(1);
@@ -1779,13 +1729,14 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
             tmp3 =
                 comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, tmp2, NI_Vector128_ToVector256Unsafe, simdBaseJitType, 16);
             BlockRange().InsertAfter(tmp2, tmp3);
-            LowerNode(tmp3);
 
             idx = comp->gtNewIconNode(0x01, TYP_INT);
             BlockRange().InsertAfter(tmp3, idx);
 
             node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, tmp3, tmp1, idx);
-            return;
+            LowerNode(tmp3);
+
+            return LowerNode(node);
         }
 
         // We will be constructing the following parts:
@@ -1813,7 +1764,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
             //   return Avx2.BroadcastScalarToVector128(tmp1);
 
             node->ChangeHWIntrinsicId(NI_AVX2_BroadcastScalarToVector128, tmp1);
-            return;
+
+            return LowerNode(node);
         }
 
         switch (simdBaseType)
@@ -1825,7 +1777,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
                 {
                     // We will be constructing the following parts:
                     //   ...
-                    //   tmp2 =    HWINTRINSIC   simd16 ubyte get_Zero
+                    //   tmp2 =    CNS_VEC   simd16 0
                     //         /--*  tmp1 simd16
                     //         +--*  tmp2 simd16
                     //   node = *  HWINTRINSIC   simd16 ubyte Shuffle
@@ -1835,8 +1787,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
                     //   var tmp2 = Vector128<byte>.Zero;
                     //   return Ssse3.Shuffle(tmp1, tmp2);
 
-                    tmp2 =
-                        comp->gtNewSimdHWIntrinsicNode(simdType, NI_Vector128_get_Zero, CORINFO_TYPE_UBYTE, simdSize);
+                    tmp2 = comp->gtNewZeroConNode(simdType, simdBaseJitType);
                     BlockRange().InsertAfter(tmp1, tmp2);
                     LowerNode(tmp2);
 
@@ -2084,7 +2035,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
             }
         }
 
-        return;
+        return LowerNode(node);
     }
 
     GenTree* op2 = node->Op(2);
@@ -2137,19 +2088,21 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
         GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt,
                                                      NI_Vector128_Create, simdBaseJitType, 16);
         BlockRange().InsertAfter(node->Op(halfArgCnt), lo);
-        LowerNode(lo);
 
         GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt,
                                                      NI_Vector128_Create, simdBaseJitType, 16);
         BlockRange().InsertAfter(node->Op(argCnt), hi);
-        LowerNode(hi);
 
         idx = comp->gtNewIconNode(0x01, TYP_INT);
         BlockRange().InsertAfter(hi, idx);
 
         assert(argCnt >= 3);
         node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, lo, hi, idx);
-        return;
+
+        LowerNode(lo);
+        LowerNode(hi);
+
+        return LowerNode(node);
     }
 
     // We will be constructing the following parts:
@@ -2564,6 +2517,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
             unreached();
         }
     }
+
+    return LowerNode(node);
 }
 
 //----------------------------------------------------------------------------------------------
@@ -2815,7 +2770,7 @@ void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node)
 //  Arguments:
 //     node - The hardware intrinsic node.
 //
-void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicId     = node->GetHWIntrinsicId();
     var_types      simdType        = node->TypeGet();
@@ -3151,6 +3106,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
         assert(node != result);
         LowerNode(node);
     }
+
+    return node->gtNext;
 }
 
 //----------------------------------------------------------------------------------------------
@@ -3159,7 +3116,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
 //  Arguments:
 //     node - The hardware intrinsic node.
 //
-void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicId     = node->GetHWIntrinsicId();
     CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
@@ -3266,8 +3223,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
                 node->SetSimdSize(16);
 
                 node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3);
-                LowerNode(node);
-                return;
+
+                return LowerNode(node);
             }
 
             case TYP_DOUBLE:
@@ -3355,8 +3312,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
                     LowerNode(tmp3);
 
                     node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3);
-                    LowerNode(node);
-                    return;
+
+                    return LowerNode(node);
                 }
 
                 multiply      = NI_SSE_Multiply;
@@ -3396,8 +3353,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
                     LowerNode(tmp3);
 
                     node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3);
-                    LowerNode(node);
-                    return;
+
+                    return LowerNode(node);
                 }
 
                 multiply      = NI_SSE2_Multiply;
@@ -3464,10 +3421,11 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
             tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, cns0, cns1, cns2, cns3, NI_Vector128_Create,
                                                   CORINFO_TYPE_INT, 16);
             BlockRange().InsertAfter(cns3, tmp1);
-            LowerNode(tmp1);
 
             op1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, tmp1, NI_SSE_And, simdBaseJitType, simdSize);
             BlockRange().InsertAfter(tmp1, op1);
+
+            LowerNode(tmp1);
             LowerNode(op1);
         }
     }
@@ -3751,7 +3709,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
     //   return tmp1.ToScalar();
 
     node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp1);
-    LowerNode(node);
+
+    return LowerNode(node);
 }
 
 //----------------------------------------------------------------------------------------------
@@ -5036,16 +4995,6 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
     if (varTypeIsSIMD(storeLoc))
     {
         assert(!op1->IsCnsIntOrI());
-        if (storeLoc->TypeIs(TYP_SIMD12) && op1->IsSIMDZero() && varDsc->lvDoNotEnregister)
-        {
-            // For a SIMD12 store we can zero from integer registers more easily.
-            MakeSrcContained(storeLoc, op1);
-            GenTree* constNode = op1->gtGetOp1();
-            assert(constNode->OperIsConst());
-            constNode->ClearContained();
-            constNode->gtType = TYP_INT;
-            constNode->SetOper(GT_CNS_INT);
-        }
         return;
     }
 #endif // FEATURE_SIMD
@@ -5568,7 +5517,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
             }
             else
 #endif // !TARGET_64BIT
-                if (op1->IsFPZero() || op1->IsIntegralConst(0) ||
+                if (op1->IsFloatPositiveZero() || op1->IsIntegralConst(0) ||
                     (varTypeIsIntegral(simdNode->GetSimdBaseType()) && op1->IsIntegralConst(-1)))
             {
                 MakeSrcContained(simdNode, op1);
@@ -6071,6 +6020,11 @@ bool Lowering::TryGetContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode
                 //
                 canBeContained = true;
             }
+            else if (node->IsCnsVec())
+            {
+                GenTreeVecCon* vecCon = node->AsVecCon();
+                canBeContained        = !vecCon->IsAllBitsSet() && !vecCon->IsZero();
+            }
         }
 
         return canBeContained;
index d0961b2..21134d2 100644 (file)
@@ -168,6 +168,29 @@ int LinearScan::BuildNode(GenTree* tree)
         }
         break;
 
+        case GT_CNS_VEC:
+        {
+            GenTreeVecCon* vecCon = tree->AsVecCon();
+
+            if (vecCon->IsAllBitsSet() || vecCon->IsZero())
+            {
+                // Directly encode constant to instructions.
+            }
+            else
+            {
+                // Reserve int to load constant from memory (IF_LARGELDC)
+                buildInternalIntRegisterDefForNode(tree);
+                buildInternalRegisterUses();
+            }
+
+            srcCount = 0;
+            assert(dstCount == 1);
+
+            RefPosition* def               = BuildDef(tree);
+            def->getInterval()->isConstant = true;
+            break;
+        }
+
         case GT_BOX:
         case GT_COMMA:
         case GT_QMARK:
index e97b749..9c29ca4 100644 (file)
@@ -3481,7 +3481,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
 
 // First, define internal registers.
 #ifdef FEATURE_SIMD
-    if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12))
+    if (varTypeIsSIMD(storeLoc) && !op1->IsVectorZero() && (storeLoc->TypeGet() == TYP_SIMD12))
     {
         // Need an additional register to extract upper 4 bytes of Vector3,
         // it has to be float for x86.
@@ -3541,20 +3541,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
 #endif // !TARGET_64BIT
     else if (op1->isContained())
     {
-#ifdef TARGET_XARCH
-        if (varTypeIsSIMD(storeLoc))
-        {
-            // This is the zero-init case, and we need a register to hold the zero.
-            // (On Arm64 we can just store REG_ZR.)
-            assert(op1->IsSIMDZero());
-            singleUseRef = BuildUse(op1->gtGetOp1());
-            srcCount     = 1;
-        }
-        else
-#endif
-        {
-            srcCount = 0;
-        }
+        srcCount = 0;
     }
     else
     {
index 519a3e7..d6bb312 100644 (file)
@@ -935,10 +935,20 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
 #if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
                 else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet()))
                 {
+                    GenTree* nodeToCheck = argx;
+
+                    if (nodeToCheck->OperIs(GT_OBJ))
+                    {
+                        nodeToCheck = nodeToCheck->AsObj()->gtOp1;
+
+                        if (nodeToCheck->OperIs(GT_ADDR))
+                        {
+                            nodeToCheck = nodeToCheck->AsOp()->gtOp1;
+                        }
+                    }
+
                     // SIMD types do not need the optimization below due to their sizes
-                    if (argx->OperIsSimdOrHWintrinsic() ||
-                        (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) &&
-                         argx->AsObj()->gtOp1->AsOp()->gtOp1->OperIsSimdOrHWintrinsic()))
+                    if (nodeToCheck->OperIsSimdOrHWintrinsic() || nodeToCheck->IsCnsVec())
                     {
                         SetNeedsTemp(&arg);
                     }
@@ -9016,7 +9026,7 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
         return nullptr;
     }
 
-    if (src->IsCall() || src->OperIsSIMD())
+    if (src->IsCall() || src->OperIsSimdOrHWintrinsic() || src->IsCnsVec())
     {
         // Can't take ADDR from these nodes, let fgMorphCopyBlock handle it, #11413.
         return nullptr;
@@ -9268,7 +9278,7 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
                 noway_assert(src->IsIntegralConst(0));
                 noway_assert(destVarDsc != nullptr);
 
-                src = gtNewSIMDNode(asgType, src, SIMDIntrinsicInit, destVarDsc->GetSimdBaseJitType(), size);
+                src = gtNewZeroConNode(asgType, CORINFO_TYPE_FLOAT);
             }
             else
 #endif
@@ -9854,6 +9864,13 @@ GenTree* Compiler::getSIMDStructFromField(GenTree*     tree,
                 *simdBaseJitTypeOut          = simdNode->GetSimdBaseJitType();
             }
 #endif // FEATURE_HW_INTRINSICS
+            else if (obj->IsCnsVec())
+            {
+                ret                   = obj;
+                GenTreeVecCon* vecCon = obj->AsVecCon();
+                *simdSizeOut          = vecCon->GetSimdSize();
+                *simdBaseJitTypeOut   = vecCon->GetSimdBaseJitType();
+            }
         }
     }
     if (ret != nullptr)
@@ -12694,56 +12711,20 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
         return node;
     }
 
-    switch (node->GetHWIntrinsicId())
+    simd32_t simd32Val = {};
+
+    if (GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val))
     {
-        case NI_Vector128_Create:
-#if defined(TARGET_XARCH)
-        case NI_Vector256_Create:
-#elif defined(TARGET_ARM64)
-        case NI_Vector64_Create:
-#endif
-        {
-            bool hwAllArgsAreConstZero = true;
-            for (GenTree* arg : node->Operands())
-            {
-                if (!arg->IsIntegralConst(0) && !arg->IsFloatPositiveZero())
-                {
-                    hwAllArgsAreConstZero = false;
-                    break;
-                }
-            }
+        GenTreeVecCon* vecCon = gtNewVconNode(node->TypeGet(), node->GetSimdBaseJitType());
 
-            if (hwAllArgsAreConstZero)
-            {
-                switch (node->GetHWIntrinsicId())
-                {
-                    case NI_Vector128_Create:
-                    {
-                        node->ResetHWIntrinsicId(NI_Vector128_get_Zero);
-                        break;
-                    }
-#if defined(TARGET_XARCH)
-                    case NI_Vector256_Create:
-                    {
-                        node->ResetHWIntrinsicId(NI_Vector256_get_Zero);
-                        break;
-                    }
-#elif defined(TARGET_ARM64)
-                    case NI_Vector64_Create:
-                    {
-                        node->ResetHWIntrinsicId(NI_Vector64_get_Zero);
-                        break;
-                    }
-#endif
-                    default:
-                        unreached();
-                }
-            }
-            break;
+        for (GenTree* arg : node->Operands())
+        {
+            DEBUG_DESTROY_NODE(arg);
         }
 
-        default:
-            break;
+        vecCon->gtSimd32Val = simd32Val;
+        INDEBUG(vecCon->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
+        return vecCon;
     }
 
     return node;
@@ -13760,7 +13741,7 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp)
                 GenTree* op2 = hw->Op(2);
                 if (!gtIsActiveCSE_Candidate(hw))
                 {
-                    if (op2->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op2))
+                    if (op2->IsVectorZero() && !gtIsActiveCSE_Candidate(op2))
                     {
                         DEBUG_DESTROY_NODE(hw);
                         DEBUG_DESTROY_NODE(op2);
index d2b1955..539a86b 100644 (file)
@@ -340,15 +340,10 @@ void MorphInitBlockHelper::MorphStructCases()
         m_result->gtFlags |= (m_dst->gtFlags & GTF_ALL_EFFECT);
 
 #if FEATURE_SIMD
-        if (varTypeIsSIMD(m_asg) && (m_dst == m_dstLclNode))
+        if (varTypeIsSIMD(m_asg) && (m_dst == m_dstLclNode) && m_src->IsIntegralConst(0))
         {
-            // For a SIMD local init we need to call SIMDIntrinsic init.
-            // We need this block becuase morph does not create SIMD init for promoted lclVars.
-            assert(m_src->IsIntegralConst(0) || m_src->IsFPZero());
             assert(m_dstVarDsc != nullptr);
-            const var_types asgType         = m_asg->TypeGet();
-            CorInfoType     simdBaseJitType = m_dstVarDsc->GetSimdBaseJitType();
-            m_src = m_comp->gtNewSIMDNode(asgType, m_src, SIMDIntrinsicInit, simdBaseJitType, m_blockSize);
+            m_src                   = m_comp->gtNewZeroConNode(m_asg->TypeGet(), CORINFO_TYPE_FLOAT);
             m_result->AsOp()->gtOp2 = m_src;
         }
 #endif // FEATURE_SIMD
index ab5d141..af04a6b 100644 (file)
@@ -3577,6 +3577,7 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
         case GT_CNS_INT:
         case GT_CNS_DBL:
         case GT_CNS_STR:
+        case GT_CNS_VEC:
             return true; // We reach here only when CSE_CONSTS is enabled
 
         case GT_ARR_ELEM:
index a850aa5..27462c0 100644 (file)
@@ -134,17 +134,22 @@ void Rationalizer::RewriteSIMDIndir(LIR::Use& use)
         addr->gtType = simdType;
         use.ReplaceWith(addr);
     }
-    else if (addr->OperIs(GT_ADDR) && addr->AsUnOp()->gtGetOp1()->OperIsSimdOrHWintrinsic())
+    else if (addr->OperIs(GT_ADDR))
     {
-        // If we have IND(ADDR(SIMD)) then we can keep only the SIMD node.
-        // This is a special tree created by impNormStructVal to preserve the class layout
-        // needed by call morphing on an OBJ node. This information is no longer needed at
-        // this point (and the address of a SIMD node can't be obtained anyway).
+        GenTree* location = addr->AsUnOp()->gtGetOp1();
 
-        BlockRange().Remove(indir);
-        BlockRange().Remove(addr);
+        if (location->OperIsSimdOrHWintrinsic() || location->IsCnsVec())
+        {
+            // If we have IND(ADDR(SIMD)) then we can keep only the SIMD node.
+            // This is a special tree created by impNormStructVal to preserve the class layout
+            // needed by call morphing on an OBJ node. This information is no longer needed at
+            // this point (and the address of a SIMD node can't be obtained anyway).
+
+            BlockRange().Remove(indir);
+            BlockRange().Remove(addr);
 
-        use.ReplaceWith(addr->AsUnOp()->gtGetOp1());
+            use.ReplaceWith(addr->AsUnOp()->gtGetOp1());
+        }
     }
 #endif // FEATURE_SIMD
 }
@@ -376,20 +381,35 @@ void Rationalizer::RewriteAssignment(LIR::Use& use)
         {
             if (location->OperIs(GT_LCL_VAR))
             {
-                var_types   simdType        = location->TypeGet();
-                GenTree*    initVal         = assignment->AsOp()->gtOp2;
+                var_types simdType = location->TypeGet();
+                GenTree*  initVal  = assignment->AsOp()->gtOp2;
+
                 CorInfoType simdBaseJitType = comp->getBaseJitTypeOfSIMDLocal(location);
                 if (simdBaseJitType == CORINFO_TYPE_UNDEF)
                 {
                     // Lie about the type if we don't know/have it.
                     simdBaseJitType = CORINFO_TYPE_FLOAT;
                 }
-                GenTreeSIMD* simdTree =
-                    comp->gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, genTypeSize(simdType));
-                assignment->gtOp2 = simdTree;
-                value             = simdTree;
 
-                BlockRange().InsertAfter(initVal, simdTree);
+                if (initVal->IsIntegralConst(0))
+                {
+                    GenTree* zeroCon = comp->gtNewZeroConNode(simdType, simdBaseJitType);
+
+                    assignment->gtOp2 = zeroCon;
+                    value             = zeroCon;
+
+                    BlockRange().InsertAfter(initVal, zeroCon);
+                    BlockRange().Remove(initVal);
+                }
+                else
+                {
+                    GenTreeSIMD* simdTree = comp->gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType,
+                                                                genTypeSize(simdType));
+                    assignment->gtOp2 = simdTree;
+                    value             = simdTree;
+
+                    BlockRange().InsertAfter(initVal, simdTree);
+                }
             }
         }
 #endif // FEATURE_SIMD
@@ -733,6 +753,22 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge
         }
 #endif // FEATURE_HW_INTRINSICS
 
+#if defined(FEATURE_SIMD)
+        case GT_CNS_VEC:
+        {
+            GenTreeVecCon* vecCon = node->AsVecCon();
+
+            // TODO-1stClassStructs: do not retype SIMD nodes
+
+            if ((vecCon->TypeIs(TYP_I_IMPL)) && (vecCon->GetSimdSize() == TARGET_POINTER_SIZE))
+            {
+                assert(genTypeSize(vecCon->GetSimdBaseType()) == 4);
+                vecCon->gtType = TYP_SIMD8;
+            }
+            break;
+        }
+#endif // FEATURE_SIMD
+
         default:
             // Check that we don't have nodes not allowed in HIR here.
             assert((node->DebugOperKind() & DBK_NOTHIR) == 0);
index 8388f1e..f59c09b 100644 (file)
@@ -36,6 +36,112 @@ enum SIMDLevel
 #endif
 };
 
+struct simd8_t
+{
+    union {
+        float    f32[2];
+        double   f64[1];
+        int8_t   i8[8];
+        int16_t  i16[4];
+        int32_t  i32[2];
+        int64_t  i64[1];
+        uint8_t  u8[8];
+        uint16_t u16[4];
+        uint32_t u32[2];
+        uint64_t u64[1];
+    };
+
+    bool operator==(const simd8_t& other) const
+    {
+        return (u64[0] == other.u64[0]);
+    }
+
+    bool operator!=(const simd8_t& other) const
+    {
+        return (u64[0] != other.u64[0]);
+    }
+};
+
+struct simd12_t
+{
+    union {
+        float    f32[3];
+        int8_t   i8[12];
+        int16_t  i16[6];
+        int32_t  i32[3];
+        uint8_t  u8[12];
+        uint16_t u16[6];
+        uint32_t u32[3];
+    };
+
+    bool operator==(const simd12_t& other) const
+    {
+        return (u32[0] == other.u32[0]) && (u32[1] == other.u32[1]) && (u32[2] == other.u32[2]);
+    }
+
+    bool operator!=(const simd12_t& other) const
+    {
+        return (u32[0] != other.u32[0]) || (u32[1] != other.u32[1]) || (u32[2] != other.u32[2]);
+    }
+};
+
+struct simd16_t
+{
+    union {
+        float    f32[4];
+        double   f64[2];
+        int8_t   i8[16];
+        int16_t  i16[8];
+        int32_t  i32[4];
+        int64_t  i64[2];
+        uint8_t  u8[16];
+        uint16_t u16[8];
+        uint32_t u32[4];
+        uint64_t u64[2];
+        simd8_t  v64[2];
+    };
+
+    bool operator==(const simd16_t& other) const
+    {
+        return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]);
+    }
+
+    bool operator!=(const simd16_t& other) const
+    {
+        return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]);
+    }
+};
+
+struct simd32_t
+{
+    union {
+        float    f32[8];
+        double   f64[4];
+        int8_t   i8[32];
+        int16_t  i16[16];
+        int32_t  i32[8];
+        int64_t  i64[4];
+        uint8_t  u8[32];
+        uint16_t u16[16];
+        uint32_t u32[8];
+        uint64_t u64[4];
+        simd8_t  v64[4];
+        simd16_t v128[2];
+    };
+
+    bool operator==(const simd32_t& other) const
+    {
+        return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]) && (u64[2] == other.u64[2]) &&
+               (u64[3] == other.u64[3]);
+    }
+
+    bool operator!=(const simd32_t& other) const
+    {
+        return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]) || (u64[2] != other.u64[2]) ||
+               (u64[3] != other.u64[3]);
+    }
+};
+
 #ifdef FEATURE_SIMD
 
 #ifdef DEBUG
index ab12655..9dee43e 100644 (file)
@@ -538,36 +538,86 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
             switch (intrinsic)
             {
 #if defined(TARGET_XARCH)
+                case NI_VectorT128_get_AllBitsSet:
+                case NI_VectorT256_get_AllBitsSet:
+                {
+                    return gtNewAllBitsSetConNode(retType, simdBaseJitType);
+                }
+
+                case NI_VectorT128_get_Count:
+                case NI_VectorT256_get_Count:
+                {
+                    GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT);
+                    countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
+                    return countNode;
+                }
+
                 case NI_Vector2_get_One:
                 case NI_Vector3_get_One:
                 case NI_Vector4_get_One:
                 case NI_VectorT128_get_One:
                 case NI_VectorT256_get_One:
                 {
+                    GenTreeVecCon* vecCon     = gtNewVconNode(retType, simdBaseJitType);
+                    uint32_t       simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+
                     switch (simdBaseType)
                     {
                         case TYP_BYTE:
                         case TYP_UBYTE:
+                        {
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd32Val.u8[index] = 1;
+                            }
+                            break;
+                        }
+
                         case TYP_SHORT:
                         case TYP_USHORT:
+                        {
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd32Val.u16[index] = 1;
+                            }
+                            break;
+                        }
+
                         case TYP_INT:
                         case TYP_UINT:
                         {
-                            op1 = gtNewIconNode(1, TYP_INT);
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd32Val.u32[index] = 1;
+                            }
                             break;
                         }
 
                         case TYP_LONG:
                         case TYP_ULONG:
                         {
-                            op1 = gtNewLconNode(1);
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd32Val.u64[index] = 1;
+                            }
                             break;
                         }
 
                         case TYP_FLOAT:
+                        {
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd32Val.f32[index] = 1.0f;
+                            }
+                            break;
+                        }
+
                         case TYP_DOUBLE:
                         {
-                            op1 = gtNewDconNode(1.0, simdBaseType);
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd32Val.f64[index] = 1.0;
+                            }
                             break;
                         }
 
@@ -577,47 +627,95 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                         }
                     }
 
-                    return gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize,
-                                                        /* isSimdAsHWIntrinsic */ true);
+                    return vecCon;
+                }
+
+                case NI_Vector2_get_Zero:
+                case NI_Vector3_get_Zero:
+                case NI_Vector4_get_Zero:
+                case NI_VectorT128_get_Zero:
+                case NI_VectorT256_get_Zero:
+                {
+                    return gtNewZeroConNode(retType, simdBaseJitType);
+                }
+#elif defined(TARGET_ARM64)
+                case NI_VectorT128_get_AllBitsSet:
+                {
+                    return gtNewAllBitsSetConNode(retType, simdBaseJitType);
                 }
 
                 case NI_VectorT128_get_Count:
-                case NI_VectorT256_get_Count:
                 {
                     GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT);
                     countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
                     return countNode;
                 }
-#elif defined(TARGET_ARM64)
+
                 case NI_Vector2_get_One:
                 case NI_Vector3_get_One:
                 case NI_Vector4_get_One:
                 case NI_VectorT128_get_One:
                 {
+                    GenTreeVecCon* vecCon     = gtNewVconNode(retType, simdBaseJitType);
+                    uint32_t       simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+
                     switch (simdBaseType)
                     {
                         case TYP_BYTE:
                         case TYP_UBYTE:
+                        {
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd16Val.u8[index] = 1;
+                            }
+                            break;
+                        }
+
                         case TYP_SHORT:
                         case TYP_USHORT:
+                        {
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd16Val.u16[index] = 1;
+                            }
+                            break;
+                        }
+
                         case TYP_INT:
                         case TYP_UINT:
                         {
-                            op1 = gtNewIconNode(1, TYP_INT);
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd16Val.u32[index] = 1;
+                            }
                             break;
                         }
 
                         case TYP_LONG:
                         case TYP_ULONG:
                         {
-                            op1 = gtNewLconNode(1);
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd16Val.u64[index] = 1;
+                            }
                             break;
                         }
 
                         case TYP_FLOAT:
+                        {
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd16Val.f32[index] = 1.0f;
+                            }
+                            break;
+                        }
+
                         case TYP_DOUBLE:
                         {
-                            op1 = gtNewDconNode(1.0, simdBaseType);
+                            for (uint32_t index = 0; index < simdLength; index++)
+                            {
+                                vecCon->gtSimd16Val.f64[index] = 1.0;
+                            }
                             break;
                         }
 
@@ -627,15 +725,15 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                         }
                     }
 
-                    return gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize,
-                                                        /* isSimdAsHWIntrinsic */ true);
+                    return vecCon;
                 }
 
-                case NI_VectorT128_get_Count:
+                case NI_Vector2_get_Zero:
+                case NI_Vector3_get_Zero:
+                case NI_Vector4_get_Zero:
+                case NI_VectorT128_get_Zero:
                 {
-                    GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT);
-                    countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
-                    return countNode;
+                    return gtNewZeroConNode(retType, simdBaseJitType);
                 }
 #else
 #error Unsupported platform
index 848889b..78ca6ab 100644 (file)
@@ -41,7 +41,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2,     Abs,
 SIMD_AS_HWINTRINSIC_NM(Vector2,     CreateBroadcast,            ".ctor",                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector2_CreateBroadcast,                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::InstanceMethod)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     Dot,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector64_Dot,                                NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     get_One,                                                0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector2_get_One,                             NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector2,     get_Zero,                                               0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector64_get_Zero,                           NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector2,     get_Zero,                                               0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector2_get_Zero,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     Max,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Max,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     Min,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Min,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     op_Addition,                                            2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Add,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
@@ -61,7 +61,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3,     Abs,
 SIMD_AS_HWINTRINSIC_NM(Vector3,     CreateBroadcast,            ".ctor",                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector3_CreateBroadcast,                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::InstanceMethod)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     Dot,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector128_Dot,                               NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     get_One,                                                0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector3_get_One,                             NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector3,     get_Zero,                                               0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector128_get_Zero,                          NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector3,     get_Zero,                                               0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector3_get_Zero  ,                          NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     Max,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Max,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     Min,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Min,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     op_Addition,                                            2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Add,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
@@ -81,7 +81,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4,     Abs,
 SIMD_AS_HWINTRINSIC_NM(Vector4,     CreateBroadcast,            ".ctor",                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector4_CreateBroadcast,                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::InstanceMethod)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     Dot,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector128_Dot,                               NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     get_One,                                                0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector4_get_One,                             NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector4,     get_Zero,                                               0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector128_get_Zero,                          NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector4,     get_Zero,                                               0,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector4_get_Zero,                            NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     Max,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Max,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     Min,                                                    2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Min,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     op_Addition,                                            2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Add,                                 NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
@@ -112,11 +112,11 @@ SIMD_AS_HWINTRINSIC_NM(VectorT128,  CreateBroadcast,            ".ctor",
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Dot,                                                    2,         {NI_Vector128_Dot,                              NI_Vector128_Dot,                               NI_Vector128_Dot,                               NI_Vector128_Dot,                               NI_Vector128_Dot,                               NI_Vector128_Dot,                               NI_Illegal,                                     NI_Illegal,                                     NI_Vector128_Dot,                               NI_Vector128_Dot},                              SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Equals,                                                 2,         {NI_AdvSimd_CompareEqual,                       NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_CompareEqual,                        NI_AdvSimd_Arm64_CompareEqual,                  NI_AdvSimd_Arm64_CompareEqual,                  NI_AdvSimd_CompareEqual,                        NI_AdvSimd_Arm64_CompareEqual},                 SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Floor,                                                  1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Floor,                               NI_AdvSimd_Arm64_Floor},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_AllBitsSet,                                         0,         {NI_Vector128_get_AllBitsSet,                   NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet,                    NI_Vector128_get_AllBitsSet},                   SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_AllBitsSet,                                         0,         {NI_VectorT128_get_AllBitsSet,                  NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet,                   NI_VectorT128_get_AllBitsSet},                  SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Count,                                              0,         {NI_VectorT128_get_Count,                       NI_VectorT128_get_Count,                        NI_VectorT128_get_Count,                        NI_VectorT128_get_Count,                        NI_VectorT128_get_Count,                        NI_VectorT128_get_Count,                        NI_VectorT128_get_Count,                        NI_VectorT128_get_Count,                        NI_VectorT128_get_Count,                        NI_VectorT128_get_Count},                       SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Item,                                               2,         {NI_VectorT128_get_Item,                        NI_VectorT128_get_Item,                         NI_VectorT128_get_Item,                         NI_VectorT128_get_Item,                         NI_VectorT128_get_Item,                         NI_VectorT128_get_Item,                         NI_VectorT128_get_Item,                         NI_VectorT128_get_Item,                         NI_VectorT128_get_Item,                         NI_VectorT128_get_Item},                        SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_One,                                                0,         {NI_VectorT128_get_One,                         NI_VectorT128_get_One,                          NI_VectorT128_get_One,                          NI_VectorT128_get_One,                          NI_VectorT128_get_One,                          NI_VectorT128_get_One,                          NI_VectorT128_get_One,                          NI_VectorT128_get_One,                          NI_VectorT128_get_One,                          NI_VectorT128_get_One},                         SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Zero,                                               0,         {NI_Vector128_get_Zero,                         NI_Vector128_get_Zero,                          NI_Vector128_get_Zero,                          NI_Vector128_get_Zero,                          NI_Vector128_get_Zero,                          NI_Vector128_get_Zero,                          NI_Vector128_get_Zero,                          NI_Vector128_get_Zero,                          NI_Vector128_get_Zero,                          NI_Vector128_get_Zero},                         SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Zero,                                               0,         {NI_VectorT128_get_Zero,                        NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero,                         NI_VectorT128_get_Zero},                        SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  GreaterThan,                                            2,         {NI_AdvSimd_CompareGreaterThan,                 NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_Arm64_CompareGreaterThan,            NI_AdvSimd_Arm64_CompareGreaterThan,            NI_AdvSimd_CompareGreaterThan,                  NI_AdvSimd_Arm64_CompareGreaterThan},           SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  GreaterThanOrEqual,                                     2,         {NI_AdvSimd_CompareGreaterThanOrEqual,          NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_Arm64_CompareGreaterThanOrEqual,     NI_AdvSimd_Arm64_CompareGreaterThanOrEqual,     NI_AdvSimd_CompareGreaterThanOrEqual,           NI_AdvSimd_Arm64_CompareGreaterThanOrEqual},    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  LessThan,                                               2,         {NI_AdvSimd_CompareLessThan,                    NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_Arm64_CompareLessThan,               NI_AdvSimd_Arm64_CompareLessThan,               NI_AdvSimd_CompareLessThan,                     NI_AdvSimd_Arm64_CompareLessThan},              SimdAsHWIntrinsicFlag::None)
index e6f3134..d676bbe 100644 (file)
@@ -41,7 +41,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2,     Abs,
 SIMD_AS_HWINTRINSIC_NM(Vector2,     CreateBroadcast,            ".ctor",                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector2_CreateBroadcast,                 NI_Illegal},                                SimdAsHWIntrinsicFlag::InstanceMethod)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     Dot,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector128_Dot,                           NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     get_One,                                                0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector2_get_One,                         NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector2,     get_Zero,                                               0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector128_get_Zero,                      NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector2,     get_Zero,                                               0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector2_get_Zero,                        NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     Max,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Max,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     Min,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Min,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector2,     op_Addition,                                            2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Add,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
@@ -61,7 +61,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3,     Abs,
 SIMD_AS_HWINTRINSIC_NM(Vector3,     CreateBroadcast,            ".ctor",                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector3_CreateBroadcast,                 NI_Illegal},                                SimdAsHWIntrinsicFlag::InstanceMethod)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     Dot,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector128_Dot,                           NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     get_One,                                                0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector3_get_One,                         NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector3,     get_Zero,                                               0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector128_get_Zero,                      NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector3,     get_Zero,                                               0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector3_get_Zero,                        NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     Max,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Max,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     Min,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Min,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     op_Addition,                                            2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Add,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
@@ -81,7 +81,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4,     Abs,
 SIMD_AS_HWINTRINSIC_NM(Vector4,     CreateBroadcast,            ".ctor",                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector4_CreateBroadcast,                 NI_Illegal},                                SimdAsHWIntrinsicFlag::InstanceMethod)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     Dot,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector128_Dot,                           NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     get_One,                                                0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector4_get_One,                         NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector4,     get_Zero,                                               0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector128_get_Zero,                      NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector4,     get_Zero,                                               0,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector4_get_Zero,                        NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     Max,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Max,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     Min,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Min,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector4,     op_Addition,                                            2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Add,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
@@ -112,11 +112,11 @@ SIMD_AS_HWINTRINSIC_NM(VectorT128,  CreateBroadcast,            ".ctor",
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Dot,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Vector128_Dot,                           NI_Vector128_Dot,                           NI_VectorT128_Dot,                          NI_VectorT128_Dot,                          NI_Illegal,                                 NI_Illegal,                                 NI_Vector128_Dot,                           NI_Vector128_Dot},                          SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Equals,                                                 2,         {NI_SSE2_CompareEqual,                      NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_SSE2_CompareEqual,                       NI_VectorT128_Equals,                       NI_VectorT128_Equals,                       NI_SSE_CompareEqual,                        NI_SSE2_CompareEqual},                      SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Floor,                                                  1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE41_Floor,                             NI_SSE41_Floor},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_AllBitsSet,                                         0,         {NI_Vector128_get_AllBitsSet,               NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet,                NI_Vector128_get_AllBitsSet},               SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_AllBitsSet,                                         0,         {NI_VectorT128_get_AllBitsSet,              NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet,               NI_VectorT128_get_AllBitsSet},              SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Count,                                              0,         {NI_VectorT128_get_Count,                   NI_VectorT128_get_Count,                    NI_VectorT128_get_Count,                    NI_VectorT128_get_Count,                    NI_VectorT128_get_Count,                    NI_VectorT128_get_Count,                    NI_VectorT128_get_Count,                    NI_VectorT128_get_Count,                    NI_VectorT128_get_Count,                    NI_VectorT128_get_Count},                   SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Item,                                               2,         {NI_VectorT128_get_Item,                    NI_VectorT128_get_Item,                     NI_VectorT128_get_Item,                     NI_VectorT128_get_Item,                     NI_VectorT128_get_Item,                     NI_VectorT128_get_Item,                     NI_VectorT128_get_Item,                     NI_VectorT128_get_Item,                     NI_VectorT128_get_Item,                     NI_VectorT128_get_Item},                    SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_One,                                                0,         {NI_VectorT128_get_One,                     NI_VectorT128_get_One,                      NI_VectorT128_get_One,                      NI_VectorT128_get_One,                      NI_VectorT128_get_One,                      NI_VectorT128_get_One,                      NI_VectorT128_get_One,                      NI_VectorT128_get_One,                      NI_VectorT128_get_One,                      NI_VectorT128_get_One},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Zero,                                               0,         {NI_Vector128_get_Zero,                     NI_Vector128_get_Zero,                      NI_Vector128_get_Zero,                      NI_Vector128_get_Zero,                      NI_Vector128_get_Zero,                      NI_Vector128_get_Zero,                      NI_Vector128_get_Zero,                      NI_Vector128_get_Zero,                      NI_Vector128_get_Zero,                      NI_Vector128_get_Zero},                     SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  get_Zero,                                               0,         {NI_VectorT128_get_Zero,                    NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero,                     NI_VectorT128_get_Zero},                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  GreaterThan,                                            2,         {NI_SSE2_CompareGreaterThan,                NI_VectorT128_GreaterThan,                  NI_SSE2_CompareGreaterThan,                 NI_VectorT128_GreaterThan,                  NI_SSE2_CompareGreaterThan,                 NI_VectorT128_GreaterThan,                  NI_VectorT128_GreaterThan,                  NI_VectorT128_GreaterThan,                  NI_SSE_CompareGreaterThan,                  NI_SSE2_CompareGreaterThan},                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  GreaterThanOrEqual,                                     2,         {NI_VectorT128_GreaterThanOrEqual,          NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_VectorT128_GreaterThanOrEqual,           NI_SSE_CompareGreaterThanOrEqual,           NI_SSE2_CompareGreaterThanOrEqual},         SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  LessThan,                                               2,         {NI_SSE2_CompareLessThan,                   NI_VectorT128_LessThan,                     NI_SSE2_CompareLessThan,                    NI_VectorT128_LessThan,                     NI_SSE2_CompareLessThan,                    NI_VectorT128_LessThan,                     NI_VectorT128_LessThan,                     NI_VectorT128_LessThan,                     NI_SSE_CompareLessThan,                     NI_SSE2_CompareLessThan},                   SimdAsHWIntrinsicFlag::None)
@@ -162,11 +162,11 @@ SIMD_AS_HWINTRINSIC_NM(VectorT256,  CreateBroadcast,            ".ctor",
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  Dot,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_Vector256_Dot,                           NI_Vector256_Dot,                           NI_Vector256_Dot,                           NI_Vector256_Dot,                           NI_Illegal,                                 NI_Illegal,                                 NI_Vector256_Dot,                           NI_Vector256_Dot},                          SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  Equals,                                                 2,         {NI_AVX2_CompareEqual,                      NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX2_CompareEqual,                       NI_AVX_CompareEqual,                        NI_AVX_CompareEqual},                       SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  Floor,                                                  1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_AVX_Floor,                               NI_AVX_Floor},                              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT256,  get_AllBitsSet,                                         0,         {NI_Vector256_get_AllBitsSet,               NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet,                NI_Vector256_get_AllBitsSet},               SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256,  get_AllBitsSet,                                         0,         {NI_VectorT256_get_AllBitsSet,              NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet,               NI_VectorT256_get_AllBitsSet},              SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  get_Count,                                              0,         {NI_VectorT256_get_Count,                   NI_VectorT256_get_Count,                    NI_VectorT256_get_Count,                    NI_VectorT256_get_Count,                    NI_VectorT256_get_Count,                    NI_VectorT256_get_Count,                    NI_VectorT256_get_Count,                    NI_VectorT256_get_Count,                    NI_VectorT256_get_Count,                    NI_VectorT256_get_Count},                   SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  get_Item,                                               2,         {NI_VectorT256_get_Item,                    NI_VectorT256_get_Item,                     NI_VectorT256_get_Item,                     NI_VectorT256_get_Item,                     NI_VectorT256_get_Item,                     NI_VectorT256_get_Item,                     NI_VectorT256_get_Item,                     NI_VectorT256_get_Item,                     NI_VectorT256_get_Item,                     NI_VectorT256_get_Item},                    SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  get_One,                                                0,         {NI_VectorT256_get_One,                     NI_VectorT256_get_One,                      NI_VectorT256_get_One,                      NI_VectorT256_get_One,                      NI_VectorT256_get_One,                      NI_VectorT256_get_One,                      NI_VectorT256_get_One,                      NI_VectorT256_get_One,                      NI_VectorT256_get_One,                      NI_VectorT256_get_One},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT256,  get_Zero,                                               0,         {NI_Vector256_get_Zero,                     NI_Vector256_get_Zero,                      NI_Vector256_get_Zero,                      NI_Vector256_get_Zero,                      NI_Vector256_get_Zero,                      NI_Vector256_get_Zero,                      NI_Vector256_get_Zero,                      NI_Vector256_get_Zero,                      NI_Vector256_get_Zero,                      NI_Vector256_get_Zero},                     SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256,  get_Zero,                                               0,         {NI_VectorT256_get_Zero,                    NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero,                     NI_VectorT256_get_Zero},                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  GreaterThan,                                            2,         {NI_AVX2_CompareGreaterThan,                NI_VectorT256_GreaterThan,                  NI_AVX2_CompareGreaterThan,                 NI_VectorT256_GreaterThan,                  NI_AVX2_CompareGreaterThan,                 NI_VectorT256_GreaterThan,                  NI_AVX2_CompareGreaterThan,                 NI_VectorT256_GreaterThan,                  NI_AVX_CompareGreaterThan,                  NI_AVX_CompareGreaterThan},                 SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  GreaterThanOrEqual,                                     2,         {NI_VectorT256_GreaterThanOrEqual,          NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_VectorT256_GreaterThanOrEqual,           NI_AVX_CompareGreaterThanOrEqual,           NI_AVX_CompareGreaterThanOrEqual},          SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  LessThan,                                               2,         {NI_AVX2_CompareLessThan,                   NI_VectorT256_LessThan,                     NI_AVX2_CompareLessThan,                    NI_VectorT256_LessThan,                     NI_AVX2_CompareLessThan,                    NI_VectorT256_LessThan,                     NI_AVX2_CompareLessThan,                    NI_VectorT256_LessThan,                     NI_AVX_CompareLessThan,                     NI_AVX_CompareLessThan},                    SimdAsHWIntrinsicFlag::None)
index 5e0942d..b6f406d 100644 (file)
@@ -442,7 +442,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
 #endif // !defined(TARGET_64BIT)
         if (op1->isContained())
     {
-        if (op1->IsIntegralConst(0) || op1->IsFPZero())
+        if (op1->IsIntegralConst(0) || op1->IsFloatPositiveZero())
         {
             genSIMDZero(targetType, baseType, targetReg);
         }
@@ -943,22 +943,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
     unsigned varNum = lclVar->GetLclNum();
     assert(varNum < compiler->lvaCount);
 
-    regNumber tmpReg = treeNode->GetSingleTempReg();
-    GenTree*  op1    = lclVar->gtOp1;
-    if (op1->isContained())
-    {
-        // This is only possible for a zero-init.
-        assert(op1->IsIntegralConst(0) || op1->IsSIMDZero());
-        genSIMDZero(TYP_SIMD16, op1->AsSIMD()->GetSimdBaseType(), tmpReg);
-
-        // store lower 8 bytes
-        GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, tmpReg, varNum, offs);
-
-        // Store upper 4 bytes
-        GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8);
-
-        return;
-    }
+    GenTree* op1 = lclVar->gtOp1;
 
     assert(!op1->isContained());
     regNumber operandReg = genConsumeReg(op1);
@@ -966,11 +951,18 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
     // store lower 8 bytes
     GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);
 
-    // Extract upper 4-bytes from operandReg
-    GetEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+    if (!op1->IsVectorZero())
+    {
+        regNumber tmpReg = treeNode->GetSingleTempReg();
+
+        // Extract upper 4-bytes from operandReg
+        GetEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+
+        operandReg = tmpReg;
+    }
 
     // Store upper 4 bytes
-    GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8);
+    GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, operandReg, varNum, offs + 8);
 }
 
 //-----------------------------------------------------------------------------
index 8dec721..47a311e 100644 (file)
@@ -442,6 +442,12 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc)
     , m_floatCnsMap(nullptr)
     , m_doubleCnsMap(nullptr)
     , m_byrefCnsMap(nullptr)
+#if defined(FEATURE_SIMD)
+    , m_simd8CnsMap(nullptr)
+    , m_simd12CnsMap(nullptr)
+    , m_simd16CnsMap(nullptr)
+    , m_simd32CnsMap(nullptr)
+#endif // FEATURE_SIMD
     , m_VNFunc0Map(nullptr)
     , m_VNFunc1Map(nullptr)
     , m_VNFunc2Map(nullptr)
@@ -1672,6 +1678,33 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type
                     // Since this value is always the same, we represent it as a static.
                     m_defs = &s_specialRefConsts[0];
                     break; // Nothing to do.
+
+#if defined(FEATURE_SIMD)
+                case TYP_SIMD8:
+                {
+                    m_defs = new (alloc) Alloc<TYP_SIMD8>::Type[ChunkSize];
+                    break;
+                }
+
+                case TYP_SIMD12:
+                {
+                    m_defs = new (alloc) Alloc<TYP_SIMD12>::Type[ChunkSize];
+                    break;
+                }
+
+                case TYP_SIMD16:
+                {
+                    m_defs = new (alloc) Alloc<TYP_SIMD16>::Type[ChunkSize];
+                    break;
+                }
+
+                case TYP_SIMD32:
+                {
+                    m_defs = new (alloc) Alloc<TYP_SIMD32>::Type[ChunkSize];
+                    break;
+                }
+#endif // FEATURE_SIMD
+
                 default:
                     assert(false); // Should not reach here.
             }
@@ -1807,6 +1840,28 @@ ValueNum ValueNumStore::VNForByrefCon(target_size_t cnsVal)
     return VnForConst(cnsVal, GetByrefCnsMap(), TYP_BYREF);
 }
 
+#if defined(FEATURE_SIMD)
+ValueNum ValueNumStore::VNForSimd8Con(simd8_t cnsVal)
+{
+    return VnForConst(cnsVal, GetSimd8CnsMap(), TYP_SIMD8);
+}
+
+ValueNum ValueNumStore::VNForSimd12Con(simd12_t cnsVal)
+{
+    return VnForConst(cnsVal, GetSimd12CnsMap(), TYP_SIMD12);
+}
+
+ValueNum ValueNumStore::VNForSimd16Con(simd16_t cnsVal)
+{
+    return VnForConst(cnsVal, GetSimd16CnsMap(), TYP_SIMD16);
+}
+
+ValueNum ValueNumStore::VNForSimd32Con(simd32_t cnsVal)
+{
+    return VnForConst(cnsVal, GetSimd32CnsMap(), TYP_SIMD32);
+}
+#endif // FEATURE_SIMD
+
 ValueNum ValueNumStore::VNForCastOper(var_types castToType, bool srcIsUnsigned)
 {
     assert(castToType != TYP_STRUCT);
@@ -1889,15 +1944,24 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ)
 
 #ifdef FEATURE_SIMD
         case TYP_SIMD8:
+        {
+            return VNForSimd8Con({});
+        }
+
         case TYP_SIMD12:
+        {
+            return VNForSimd12Con({});
+        }
+
         case TYP_SIMD16:
+        {
+            return VNForSimd16Con({});
+        }
+
         case TYP_SIMD32:
-            // We do not have the base type - a "fake" one will have to do. Note that we cannot
-            // use the HWIntrinsic "get_Zero" VNFunc here. This is because they only represent
-            // "fully zeroed" vectors, and here we may be loading one from memory, leaving upper
-            // bits undefined. So using "SIMD_Init" is "the next best thing", so to speak, and
-            // TYP_FLOAT is one of the more popular base types, so that's why we use it here.
-            return VNForFunc(typ, VNF_SIMD_Init, VNForFloatCon(0), VNForSimdType(genTypeSize(typ), CORINFO_TYPE_FLOAT));
+        {
+            return VNForSimd32Con({});
+        }
 #endif // FEATURE_SIMD
 
         // These should be unreached.
@@ -2939,6 +3003,48 @@ float ValueNumStore::GetConstantSingle(ValueNum argVN)
     return ConstantValue<float>(argVN);
 }
 
+#if defined(FEATURE_SIMD)
+// Given a simd8 constant value number return its value as a simd8.
+//
+simd8_t ValueNumStore::GetConstantSimd8(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    assert(TypeOfVN(argVN) == TYP_SIMD8);
+
+    return ConstantValue<simd8_t>(argVN);
+}
+
+// Given a simd12 constant value number return its value as a simd12.
+//
+simd12_t ValueNumStore::GetConstantSimd12(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    assert(TypeOfVN(argVN) == TYP_SIMD12);
+
+    return ConstantValue<simd12_t>(argVN);
+}
+
+// Given a simd16 constant value number return its value as a simd16.
+//
+simd16_t ValueNumStore::GetConstantSimd16(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    assert(TypeOfVN(argVN) == TYP_SIMD16);
+
+    return ConstantValue<simd16_t>(argVN);
+}
+
+// Given a simd32 constant value number return its value as a simd32.
+//
+simd32_t ValueNumStore::GetConstantSimd32(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    assert(TypeOfVN(argVN) == TYP_SIMD32);
+
+    return ConstantValue<simd32_t>(argVN);
+}
+#endif // FEATURE_SIMD
+
 // Compute the proper value number when the VNFunc has all constant arguments
 // This essentially performs constant folding at value numbering time
 //
@@ -4994,114 +5100,6 @@ bool ValueNumStore::IsVNConstant(ValueNum vn)
     }
 }
 
-//------------------------------------------------------------------------
-// IsVNVectorZero: Checks if the value number is a Vector*_get_Zero.
-//
-// Arguments:
-//    vn - The value number.
-//
-// Return Value:
-//    true  - The value number is a Vector*_get_Zero.
-//    false - The value number is not a Vector*_get_Zero.
-bool ValueNumStore::IsVNVectorZero(ValueNum vn)
-{
-#ifdef FEATURE_SIMD
-    VNSimdTypeInfo vnInfo = GetVectorZeroSimdTypeOfVN(vn);
-    // Check the size to see if we got a valid SIMD type.
-    // '0' means it is not valid.
-    if (vnInfo.m_simdSize != 0)
-    {
-        return true;
-    }
-#endif
-    return false;
-}
-
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// GetSimdTypeOfVN: Returns the SIMD type information based on the given value number.
-//
-// Arguments:
-//    vn - The value number.
-//
-// Return Value:
-//    Returns VNSimdTypeInfo(0, CORINFO_TYPE_UNDEF) if the given value number has not been given a SIMD type.
-VNSimdTypeInfo ValueNumStore::GetSimdTypeOfVN(ValueNum vn)
-{
-    VNSimdTypeInfo vnInfo;
-
-    // The SIMD type is encoded as a function,
-    // even though it is not actually a function.
-    VNFuncApp simdType;
-    if (GetVNFunc(vn, &simdType) && simdType.m_func == VNF_SimdType)
-    {
-        assert(simdType.m_arity == 2);
-        vnInfo.m_simdSize        = GetConstantInt32(simdType.m_args[0]);
-        vnInfo.m_simdBaseJitType = (CorInfoType)GetConstantInt32(simdType.m_args[1]);
-        return vnInfo;
-    }
-
-    vnInfo.m_simdSize        = 0;
-    vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF;
-    return vnInfo;
-}
-
-//------------------------------------------------------------------------
-// GetVectorZeroSimdTypeOfVN: Returns the SIMD type information based on the given value number
-//                            if it's Vector*_get_Zero.
-//
-// Arguments:
-//    vn - The value number.
-//
-// Return Value:
-//    Returns VNSimdTypeInfo(0, CORINFO_TYPE_UNDEF) if the given value number has not been given a SIMD type
-//    for a Vector*_get_Zero value number.
-//
-// REVIEW: Vector*_get_Zero nodes in VN currently encode their SIMD type for
-//         conservative reasons. In the future, it might be possible not do this
-//         on most platforms since Vector*_get_Zero's base type does not matter.
-VNSimdTypeInfo ValueNumStore::GetVectorZeroSimdTypeOfVN(ValueNum vn)
-{
-#ifdef FEATURE_HW_INTRINSICS
-    // REVIEW: This will only return true if Vector*_get_Zero encodes
-    //         its base type as an argument. On XARCH there may be
-    //         scenarios where Vector*_get_Zero will not encode its base type;
-    //         therefore, returning false here.
-    // Vector*_get_Zero does not have any arguments,
-    // but its SIMD type is encoded as an argument.
-    VNFuncApp funcApp;
-    if (GetVNFunc(vn, &funcApp) && funcApp.m_arity == 1)
-    {
-        switch (funcApp.m_func)
-        {
-            case VNF_HWI_Vector128_get_Zero:
-#if defined(TARGET_XARCH)
-            case VNF_HWI_Vector256_get_Zero:
-#elif defined(TARGET_ARM64)
-            case VNF_HWI_Vector64_get_Zero:
-#endif
-            {
-                return GetSimdTypeOfVN(funcApp.m_args[0]);
-            }
-
-            default:
-            {
-                VNSimdTypeInfo vnInfo;
-                vnInfo.m_simdSize        = 0;
-                vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF;
-                return vnInfo;
-            }
-        }
-    }
-#endif
-
-    VNSimdTypeInfo vnInfo;
-    vnInfo.m_simdSize        = 0;
-    vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF;
-    return vnInfo;
-}
-#endif // FEATURE_SIMD
-
 bool ValueNumStore::IsVNInt32Constant(ValueNum vn)
 {
     if (!IsVNConstant(vn))
@@ -6464,17 +6462,34 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr)
 
 #ifdef FEATURE_SIMD
             case TYP_SIMD8:
+            {
+                simd8_t cnsVal = GetConstantSimd8(vn);
+                printf("Simd8Cns[0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1]);
+                break;
+            }
+
             case TYP_SIMD12:
+            {
+                simd12_t cnsVal = GetConstantSimd12(vn);
+                printf("Simd12Cns[0x%08x, 0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1], cnsVal.u32[2]);
+                break;
+            }
+
             case TYP_SIMD16:
+            {
+                simd16_t cnsVal = GetConstantSimd16(vn);
+                printf("Simd16Cns[0x%08x, 0x%08x, 0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1], cnsVal.u32[2],
+                       cnsVal.u32[3]);
+                break;
+            }
+
             case TYP_SIMD32:
             {
-                // Only the zero constant is currently allowed for SIMD types
-                //
-                INT64 val = ConstantValue<INT64>(vn);
-                assert(val == 0);
-                printf(" 0");
+                simd32_t cnsVal = GetConstantSimd32(vn);
+                printf("Simd32Cns[0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx]", cnsVal.u64[0], cnsVal.u64[1],
+                       cnsVal.u64[2], cnsVal.u64[3]);
+                break;
             }
-            break;
 #endif // FEATURE_SIMD
 
             // These should be unreached.
@@ -7944,6 +7959,16 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree)
                 tree->gtVNPair.SetBoth(
                     vnStore->VNForHandle(ssize_t(tree->AsIntConCommon()->IconValue()), tree->GetIconHandleFlag()));
             }
+#ifdef FEATURE_SIMD
+            else if (tree->IsCnsVec())
+            {
+                // TODO-1stClassStructs: do not retype SIMD nodes
+                assert(varTypeIsLong(typ));
+
+                simd8_t simd8Val = tree->AsVecCon()->gtSimd8Val;
+                tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val));
+            }
+#endif // FEATURE_SIMD
             else if ((typ == TYP_LONG) || (typ == TYP_ULONG))
             {
                 tree->gtVNPair.SetBoth(vnStore->VNForLongCon(INT64(tree->AsIntConCommon()->LngValue())));
@@ -7956,28 +7981,101 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree)
 
 #ifdef FEATURE_SIMD
         case TYP_SIMD8:
+        {
+            simd8_t simd8Val;
+
+            // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+            if (tree->IsIntegralConst(0))
+            {
+                simd8Val = {};
+            }
+            else
+            {
+                simd8Val = tree->AsVecCon()->gtSimd8Val;
+            }
+
+            tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val));
+            break;
+        }
+
         case TYP_SIMD12:
+        {
+            simd12_t simd12Val;
+
+            // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+            if (tree->IsIntegralConst(0))
+            {
+                simd12Val = {};
+            }
+            else
+            {
+                simd12Val = tree->AsVecCon()->gtSimd12Val;
+            }
+
+            tree->gtVNPair.SetBoth(vnStore->VNForSimd12Con(simd12Val));
+            break;
+        }
+
         case TYP_SIMD16:
+        {
+            simd16_t simd16Val;
+
+            // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+            if (tree->IsIntegralConst(0))
+            {
+                simd16Val = {};
+            }
+            else
+            {
+                simd16Val = tree->AsVecCon()->gtSimd16Val;
+            }
+
+            tree->gtVNPair.SetBoth(vnStore->VNForSimd16Con(simd16Val));
+            break;
+        }
+
         case TYP_SIMD32:
+        {
+            simd32_t simd32Val;
 
-#ifdef TARGET_64BIT
-            // Only the zero constant is currently allowed for SIMD types
-            //
-            assert(tree->AsIntConCommon()->LngValue() == 0);
-            tree->gtVNPair.SetBoth(vnStore->VNForLongCon(tree->AsIntConCommon()->LngValue()));
-#else // 32BIT
-            assert(tree->AsIntConCommon()->IconValue() == 0);
-            tree->gtVNPair.SetBoth(vnStore->VNForIntCon(int(tree->AsIntConCommon()->IconValue())));
-#endif
+            // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+            if (tree->IsIntegralConst(0))
+            {
+                simd32Val = {};
+            }
+            else
+            {
+                simd32Val = tree->AsVecCon()->gtSimd32Val;
+            }
+
+            tree->gtVNPair.SetBoth(vnStore->VNForSimd32Con(simd32Val));
             break;
+        }
 #endif // FEATURE_SIMD
 
         case TYP_FLOAT:
+        {
             tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->AsDblCon()->gtDconVal));
             break;
+        }
+
         case TYP_DOUBLE:
-            tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->AsDblCon()->gtDconVal));
+        {
+#ifdef FEATURE_SIMD
+            if (tree->IsCnsVec())
+            {
+                // TODO-1stClassStructs: do not retype SIMD nodes
+                simd8_t simd8Val = tree->AsVecCon()->gtSimd8Val;
+                tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val));
+            }
+            else
+#endif // FEATURE_SIMD
+            {
+                tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->AsDblCon()->gtDconVal));
+            }
             break;
+        }
+
         case TYP_REF:
             if (tree->AsIntConCommon()->IconValue() == 0)
             {
index 7340116..2275585 100644 (file)
@@ -347,6 +347,13 @@ private:
     double GetConstantDouble(ValueNum argVN);
     float GetConstantSingle(ValueNum argVN);
 
+#if defined(FEATURE_SIMD)
+    simd8_t GetConstantSimd8(ValueNum argVN);
+    simd12_t GetConstantSimd12(ValueNum argVN);
+    simd16_t GetConstantSimd16(ValueNum argVN);
+    simd32_t GetConstantSimd32(ValueNum argVN);
+#endif // FEATURE_SIMD
+
     // Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants.
     // Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second).
     // Assume that "CanEvalForConstantArgs(vnf)" is true.
@@ -419,6 +426,13 @@ public:
     ValueNum VNForDoubleCon(double cnsVal);
     ValueNum VNForByrefCon(target_size_t byrefVal);
 
+#if defined(FEATURE_SIMD)
+    ValueNum VNForSimd8Con(simd8_t cnsVal);
+    ValueNum VNForSimd12Con(simd12_t cnsVal);
+    ValueNum VNForSimd16Con(simd16_t cnsVal);
+    ValueNum VNForSimd32Con(simd32_t cnsVal);
+#endif // FEATURE_SIMD
+
 #ifdef TARGET_64BIT
     ValueNum VNForPtrSizeIntCon(INT64 cnsVal)
     {
@@ -748,14 +762,6 @@ public:
     // Returns true iff the VN represents a (non-handle) constant.
     bool IsVNConstant(ValueNum vn);
 
-    bool IsVNVectorZero(ValueNum vn);
-
-#ifdef FEATURE_SIMD
-    VNSimdTypeInfo GetSimdTypeOfVN(ValueNum vn);
-
-    VNSimdTypeInfo GetVectorZeroSimdTypeOfVN(ValueNum vn);
-#endif
-
     // Returns true iff the VN represents an integer constant.
     bool IsVNInt32Constant(ValueNum vn);
 
@@ -1390,6 +1396,133 @@ private:
         return m_byrefCnsMap;
     }
 
+#if defined(FEATURE_SIMD)
+    struct Simd8PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd8_t>
+    {
+        static bool Equals(simd8_t x, simd8_t y)
+        {
+            return x == y;
+        }
+
+        static unsigned GetHashCode(const simd8_t val)
+        {
+            unsigned hash = 0;
+
+            hash = static_cast<unsigned>(hash ^ val.u32[0]);
+            hash = static_cast<unsigned>(hash ^ val.u32[1]);
+
+            return hash;
+        }
+    };
+
+    typedef VNMap<simd8_t, Simd8PrimitiveKeyFuncs> Simd8ToValueNumMap;
+    Simd8ToValueNumMap* m_simd8CnsMap;
+    Simd8ToValueNumMap* GetSimd8CnsMap()
+    {
+        if (m_simd8CnsMap == nullptr)
+        {
+            m_simd8CnsMap = new (m_alloc) Simd8ToValueNumMap(m_alloc);
+        }
+        return m_simd8CnsMap;
+    }
+
+    struct Simd12PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd12_t>
+    {
+        static bool Equals(simd12_t x, simd12_t y)
+        {
+            return x == y;
+        }
+
+        static unsigned GetHashCode(const simd12_t val)
+        {
+            unsigned hash = 0;
+
+            hash = static_cast<unsigned>(hash ^ val.u32[0]);
+            hash = static_cast<unsigned>(hash ^ val.u32[1]);
+            hash = static_cast<unsigned>(hash ^ val.u32[2]);
+
+            return hash;
+        }
+    };
+
+    typedef VNMap<simd12_t, Simd12PrimitiveKeyFuncs> Simd12ToValueNumMap;
+    Simd12ToValueNumMap* m_simd12CnsMap;
+    Simd12ToValueNumMap* GetSimd12CnsMap()
+    {
+        if (m_simd12CnsMap == nullptr)
+        {
+            m_simd12CnsMap = new (m_alloc) Simd12ToValueNumMap(m_alloc);
+        }
+        return m_simd12CnsMap;
+    }
+
+    struct Simd16PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd16_t>
+    {
+        static bool Equals(simd16_t x, simd16_t y)
+        {
+            return x == y;
+        }
+
+        static unsigned GetHashCode(const simd16_t val)
+        {
+            unsigned hash = 0;
+
+            hash = static_cast<unsigned>(hash ^ val.u32[0]);
+            hash = static_cast<unsigned>(hash ^ val.u32[1]);
+            hash = static_cast<unsigned>(hash ^ val.u32[2]);
+            hash = static_cast<unsigned>(hash ^ val.u32[3]);
+
+            return hash;
+        }
+    };
+
+    typedef VNMap<simd16_t, Simd16PrimitiveKeyFuncs> Simd16ToValueNumMap;
+    Simd16ToValueNumMap* m_simd16CnsMap;
+    Simd16ToValueNumMap* GetSimd16CnsMap()
+    {
+        if (m_simd16CnsMap == nullptr)
+        {
+            m_simd16CnsMap = new (m_alloc) Simd16ToValueNumMap(m_alloc);
+        }
+        return m_simd16CnsMap;
+    }
+
+    struct Simd32PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd32_t>
+    {
+        static bool Equals(simd32_t x, simd32_t y)
+        {
+            return x == y;
+        }
+
+        static unsigned GetHashCode(const simd32_t val)
+        {
+            unsigned hash = 0;
+
+            hash = static_cast<unsigned>(hash ^ val.u32[0]);
+            hash = static_cast<unsigned>(hash ^ val.u32[1]);
+            hash = static_cast<unsigned>(hash ^ val.u32[2]);
+            hash = static_cast<unsigned>(hash ^ val.u32[3]);
+            hash = static_cast<unsigned>(hash ^ val.u32[4]);
+            hash = static_cast<unsigned>(hash ^ val.u32[5]);
+            hash = static_cast<unsigned>(hash ^ val.u32[6]);
+            hash = static_cast<unsigned>(hash ^ val.u32[7]);
+
+            return hash;
+        }
+    };
+
+    typedef VNMap<simd32_t, Simd32PrimitiveKeyFuncs> Simd32ToValueNumMap;
+    Simd32ToValueNumMap* m_simd32CnsMap;
+    Simd32ToValueNumMap* GetSimd32CnsMap()
+    {
+        if (m_simd32CnsMap == nullptr)
+        {
+            m_simd32CnsMap = new (m_alloc) Simd32ToValueNumMap(m_alloc);
+        }
+        return m_simd32CnsMap;
+    }
+#endif // FEATURE_SIMD
+
     template <size_t NumArgs>
     struct VNDefFuncAppKeyFuncs : public JitKeyFuncsDefEquals<VNDefFuncApp<NumArgs>>
     {
@@ -1508,6 +1641,34 @@ struct ValueNumStore::VarTypConv<TYP_DOUBLE>
     typedef INT64  Type;
     typedef double Lang;
 };
+
+#if defined(FEATURE_SIMD)
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD8>
+{
+    typedef simd8_t Type;
+    typedef simd8_t Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD12>
+{
+    typedef simd12_t Type;
+    typedef simd12_t Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD16>
+{
+    typedef simd16_t Type;
+    typedef simd16_t Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD32>
+{
+    typedef simd32_t Type;
+    typedef simd32_t Lang;
+};
+#endif // FEATURE_SIMD
+
 template <>
 struct ValueNumStore::VarTypConv<TYP_BYREF>
 {
@@ -1545,6 +1706,92 @@ FORCEINLINE T ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset)
     }
 }
 
+#if defined(FEATURE_SIMD)
+template <>
+FORCEINLINE simd8_t ValueNumStore::SafeGetConstantValue<simd8_t>(Chunk* c, unsigned offset)
+{
+    assert(c->m_typ == TYP_SIMD8);
+    return reinterpret_cast<VarTypConv<TYP_SIMD8>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd12_t ValueNumStore::SafeGetConstantValue<simd12_t>(Chunk* c, unsigned offset)
+{
+    assert(c->m_typ == TYP_SIMD12);
+    return reinterpret_cast<VarTypConv<TYP_SIMD12>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd16_t ValueNumStore::SafeGetConstantValue<simd16_t>(Chunk* c, unsigned offset)
+{
+    assert(c->m_typ == TYP_SIMD16);
+    return reinterpret_cast<VarTypConv<TYP_SIMD16>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd32_t ValueNumStore::SafeGetConstantValue<simd32_t>(Chunk* c, unsigned offset)
+{
+    assert(c->m_typ == TYP_SIMD32);
+    return reinterpret_cast<VarTypConv<TYP_SIMD32>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd8_t ValueNumStore::ConstantValueInternal<simd8_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    assert(c->m_attribs == CEA_Const);
+
+    unsigned offset = ChunkOffset(vn);
+
+    assert(c->m_typ == TYP_SIMD8);
+    assert(!coerce);
+
+    return SafeGetConstantValue<simd8_t>(c, offset);
+}
+
+template <>
+FORCEINLINE simd12_t ValueNumStore::ConstantValueInternal<simd12_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    assert(c->m_attribs == CEA_Const);
+
+    unsigned offset = ChunkOffset(vn);
+
+    assert(c->m_typ == TYP_SIMD12);
+    assert(!coerce);
+
+    return SafeGetConstantValue<simd12_t>(c, offset);
+}
+
+template <>
+FORCEINLINE simd16_t ValueNumStore::ConstantValueInternal<simd16_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    assert(c->m_attribs == CEA_Const);
+
+    unsigned offset = ChunkOffset(vn);
+
+    assert(c->m_typ == TYP_SIMD16);
+    assert(!coerce);
+
+    return SafeGetConstantValue<simd16_t>(c, offset);
+}
+
+template <>
+FORCEINLINE simd32_t ValueNumStore::ConstantValueInternal<simd32_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    assert(c->m_attribs == CEA_Const);
+
+    unsigned offset = ChunkOffset(vn);
+
+    assert(c->m_typ == TYP_SIMD32);
+    assert(!coerce);
+
+    return SafeGetConstantValue<simd32_t>(c, offset);
+}
+#endif // FEATURE_SIMD
+
 // Inline functions.
 
 // static