Moving CreateScalarUnsafe, ToScalar, Vector128.ToVector256Unsafe, and Vector256.GetLo...
authorTanner Gooding <tagoo@outlook.com>
Fri, 7 Dec 2018 01:15:03 +0000 (17:15 -0800)
committerGitHub <noreply@github.com>
Fri, 7 Dec 2018 01:15:03 +0000 (17:15 -0800)
* Moving CreateScalarUnsafe, ToScalar, Vector128.ToVector256Unsafe, and Vector256.GetLower to be intrinsics

* Adding containment support to the helper intrinsics

Commit migrated from https://github.com/dotnet/coreclr/commit/1c18b3290b825e66e973e147eda8c7cca3e539c6

src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/src/jit/hwintrinsiclistxarch.h
src/coreclr/src/jit/importer.cpp
src/coreclr/src/jit/lsraxarch.cpp
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs

index df5578f..9c9cf82 100644 (file)
@@ -3411,7 +3411,10 @@ protected:
     NamedIntrinsic lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method);
 
 #ifdef FEATURE_HW_INTRINSICS
-    GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
+    GenTree* impBaseIntrinsic(NamedIntrinsic        intrinsic,
+                              CORINFO_CLASS_HANDLE  clsHnd,
+                              CORINFO_METHOD_HANDLE method,
+                              CORINFO_SIG_INFO*     sig);
     GenTree* impHWIntrinsic(NamedIntrinsic        intrinsic,
                             CORINFO_METHOD_HANDLE method,
                             CORINFO_SIG_INFO*     sig,
index 320c9fb..8f2bc3a 100644 (file)
@@ -1254,29 +1254,111 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
     var_types      targetType  = node->TypeGet();
     var_types      baseType    = node->gtSIMDBaseType;
 
-    assert(node->gtGetOp1() == nullptr);
+    assert(compiler->compSupports(InstructionSet_SSE));
+    assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE));
+
+    GenTree*  op1    = node->gtGetOp1();
+    regNumber op1Reg = REG_NA;
+
+    if (op1 != nullptr)
+    {
+        assert(!op1->OperIsList());
+        op1Reg = op1->gtRegNum;
+        genConsumeOperands(node);
+    }
+
     assert(node->gtGetOp2() == nullptr);
-    assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);
 
-    emitter* emit = getEmitter();
-    emitAttr attr = EA_ATTR(node->gtSIMDSize);
+    emitter*    emit = getEmitter();
+    emitAttr    attr = EA_ATTR(node->gtSIMDSize);
+    instruction ins  = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
 
     switch (intrinsicId)
     {
-        case NI_Base_Vector128_Zero:
+        case NI_Base_Vector128_CreateScalarUnsafe:
+        case NI_Base_Vector256_CreateScalarUnsafe:
         {
-            // When SSE2 is supported, we generate pxor for integral types otherwise just use xorps
-            instruction ins =
-                (compiler->compSupports(InstructionSet_SSE2) && varTypeIsIntegral(baseType)) ? INS_pxor : INS_xorps;
-            emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
+            if (varTypeIsIntegral(baseType))
+            {
+                genHWIntrinsic_R_RM(node, ins, emitActualTypeSize(baseType));
+            }
+            else
+            {
+                assert(varTypeIsFloating(baseType));
+
+                attr = emitTypeSize(baseType);
+
+                if (op1->isContained() || op1->isUsedFromSpillTemp())
+                {
+                    genHWIntrinsic_R_RM(node, ins, attr);
+                }
+                else if (targetReg != op1Reg)
+                {
+                    // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
+                    emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+                }
+            }
             break;
         }
 
+        case NI_Base_Vector128_ToScalar:
+        case NI_Base_Vector256_ToScalar:
+        {
+            assert(varTypeIsFloating(baseType));
+
+            attr = emitTypeSize(TYP_SIMD16);
+
+            if (op1->isContained() || op1->isUsedFromSpillTemp())
+            {
+                genHWIntrinsic_R_RM(node, ins, attr);
+            }
+            else if (targetReg != op1Reg)
+            {
+                // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
+                emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+            }
+            break;
+        }
+
+        case NI_Base_Vector128_ToVector256:
+        {
+            // ToVector256 has zero-extend semantics in order to ensure it is deterministic
+            // We always emit a move to the target register, even when op1Reg == targetReg,
+            // in order to ensure that Bits MAXVL-1:128 are zeroed.
+
+            attr = emitTypeSize(TYP_SIMD16);
+
+            if (op1->isContained() || op1->isUsedFromSpillTemp())
+            {
+                genHWIntrinsic_R_RM(node, ins, attr);
+            }
+            else
+            {
+                // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
+                emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+            }
+            break;
+        }
+
+        case NI_Base_Vector128_ToVector256Unsafe:
+        case NI_Base_Vector256_GetLower:
+        {
+            if (op1->isContained() || op1->isUsedFromSpillTemp())
+            {
+                genHWIntrinsic_R_RM(node, ins, attr);
+            }
+            else if (targetReg != op1Reg)
+            {
+                // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
+                emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+            }
+            break;
+        }
+
+        case NI_Base_Vector128_Zero:
         case NI_Base_Vector256_Zero:
         {
-            // When AVX2 is supported, we generate pxor for integral types otherwise just use xorps
-            instruction ins =
-                (compiler->compSupports(InstructionSet_AVX2) && varTypeIsIntegral(baseType)) ? INS_pxor : INS_xorps;
+            assert(op1 == nullptr);
             emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
             break;
         }
index 75de24b..68cfda5 100644 (file)
@@ -38,7 +38,11 @@ HARDWARE_INTRINSIC(Base_Vector128_AsSingle,                         "AsSingle",
 HARDWARE_INTRINSIC(Base_Vector128_AsUInt16,                         "AsUInt16",                                    Base,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector128_AsUInt32,                         "AsUInt32",                                    Base,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector128_AsUInt64,                         "AsUInt64",                                    Base,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Base_Vector128_Zero,                             "get_Zero",                                    Base,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector128_CreateScalarUnsafe,               "CreateScalarUnsafe",                          Base,          -1,              16,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector128_ToScalar,                         "ToScalar",                                    Base,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector128_ToVector256,                      "ToVector256",                                 Base,          -1,              16,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector128_ToVector256Unsafe,                "ToVector256Unsafe",                           Base,          -1,              16,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector128_Zero,                             "get_Zero",                                    Base,          -1,              16,           0,     {INS_xorps,             INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps},             HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_As,                               "As`1",                                        Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_AsByte,                           "AsByte",                                      Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_AsDouble,                         "AsDouble",                                    Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
@@ -50,7 +54,10 @@ HARDWARE_INTRINSIC(Base_Vector256_AsSingle,                         "AsSingle",
 HARDWARE_INTRINSIC(Base_Vector256_AsUInt16,                         "AsUInt16",                                    Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_AsUInt32,                         "AsUInt32",                                    Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_AsUInt64,                         "AsUInt64",                                    Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Base_Vector256_Zero,                             "get_Zero",                                    Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector256_CreateScalarUnsafe,               "CreateScalarUnsafe",                          Base,          -1,              32,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector256_GetLower,                         "GetLower",                                    Base,          -1,              32,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector256_ToScalar,                         "ToScalar",                                    Base,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector256_Zero,                             "get_Zero",                                    Base,          -1,              32,           0,     {INS_xorps,             INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps},             HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
index 0ddf3b3..96badba 100644 (file)
@@ -3448,6 +3448,10 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 case NI_Base_Vector128_AsUInt32:
                 case NI_Base_Vector128_AsUInt64:
 #if defined(_TARGET_XARCH_)
+                case NI_Base_Vector128_CreateScalarUnsafe:
+                case NI_Base_Vector128_ToScalar:
+                case NI_Base_Vector128_ToVector256:
+                case NI_Base_Vector128_ToVector256Unsafe:
                 case NI_Base_Vector128_Zero:
                 case NI_Base_Vector256_As:
                 case NI_Base_Vector256_AsByte:
@@ -3460,10 +3464,13 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 case NI_Base_Vector256_AsUInt16:
                 case NI_Base_Vector256_AsUInt32:
                 case NI_Base_Vector256_AsUInt64:
+                case NI_Base_Vector256_CreateScalarUnsafe:
+                case NI_Base_Vector256_GetLower:
+                case NI_Base_Vector256_ToScalar:
                 case NI_Base_Vector256_Zero:
 #endif // _TARGET_XARCH_
                 {
-                    return impBaseIntrinsic(ni, method, sig);
+                    return impBaseIntrinsic(ni, clsHnd, method, sig);
                 }
 
                 default:
@@ -4101,15 +4108,20 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
 //
 // Arguments:
 //    intrinsic  -- id of the intrinsic function.
+//    clsHnd     -- handle for the intrinsic method's class
 //    method     -- method handle of the intrinsic function.
 //    sig        -- signature of the intrinsic call
 //
 // Return Value:
 //    the expanded intrinsic.
 //
-GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
+GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
+                                    CORINFO_CLASS_HANDLE  clsHnd,
+                                    CORINFO_METHOD_HANDLE method,
+                                    CORINFO_SIG_INFO*     sig)
 {
     GenTree* retNode = nullptr;
+    GenTree* op1     = nullptr;
 
     if (!featureSIMD)
     {
@@ -4117,19 +4129,26 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN
     }
 
     unsigned  simdSize = 0;
-    var_types baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize);
-    var_types retType  = getSIMDTypeForSize(simdSize);
+    var_types baseType = TYP_UNKNOWN;
+    var_types retType  = JITtype2varType(sig->retType);
 
     if (sig->hasThis())
     {
-        CORINFO_CLASS_HANDLE thisClass = info.compCompHnd->getArgClass(sig, sig->args);
-        var_types            thisType  = getBaseTypeOfSIMDType(thisClass);
+        baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize);
 
-        if (!varTypeIsArithmetic(thisType))
+        if (retType == TYP_STRUCT)
         {
-            return nullptr;
+            unsigned retSimdSize = 0;
+            getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &retSimdSize);
+            retType = getSIMDTypeForSize(retSimdSize);
         }
     }
+    else
+    {
+        assert(retType == TYP_STRUCT);
+        baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize);
+        retType  = getSIMDTypeForSize(simdSize);
+    }
 
     if (!varTypeIsArithmetic(baseType))
     {
@@ -4186,6 +4205,56 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN
         }
 
 #ifdef _TARGET_XARCH_
+        case NI_Base_Vector128_CreateScalarUnsafe:
+        {
+            assert(sig->numArgs == 1);
+
+#ifdef _TARGET_X86_
+            if (varTypeIsLong(baseType))
+            {
+                // TODO-XARCH-CQ: It may be beneficial to emit the movq
+                // instruction, which takes a 64-bit memory address and
+                // works on 32-bit x86 systems.
+                break;
+            }
+#endif // _TARGET_X86_
+
+            if (compSupports(InstructionSet_SSE2) || (compSupports(InstructionSet_SSE) && (baseType == TYP_FLOAT)))
+            {
+                op1     = impPopStack().val;
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
+            }
+            break;
+        }
+
+        case NI_Base_Vector128_ToScalar:
+        {
+            assert(sig->numArgs == 0);
+            assert(sig->hasThis());
+
+            if (compSupports(InstructionSet_SSE) && varTypeIsFloating(baseType))
+            {
+                op1     = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd);
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 16);
+            }
+            break;
+        }
+
+        case NI_Base_Vector128_ToVector256:
+        case NI_Base_Vector128_ToVector256Unsafe:
+        case NI_Base_Vector256_GetLower:
+        {
+            assert(sig->numArgs == 0);
+            assert(sig->hasThis());
+
+            if (compSupports(InstructionSet_AVX))
+            {
+                op1     = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd);
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
+            }
+            break;
+        }
+
         case NI_Base_Vector128_Zero:
         {
             assert(sig->numArgs == 0);
@@ -4197,6 +4266,41 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN
             break;
         }
 
+        case NI_Base_Vector256_CreateScalarUnsafe:
+        {
+            assert(sig->numArgs == 1);
+
+#ifdef _TARGET_X86_
+            if (varTypeIsLong(baseType))
+            {
+                // TODO-XARCH-CQ: It may be beneficial to emit the movq
+                // instruction, which takes a 64-bit memory address and
+                // works on 32-bit x86 systems.
+                break;
+            }
+#endif // _TARGET_X86_
+
+            if (compSupports(InstructionSet_AVX))
+            {
+                op1     = impPopStack().val;
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
+            }
+            break;
+        }
+
+        case NI_Base_Vector256_ToScalar:
+        {
+            assert(sig->numArgs == 0);
+            assert(sig->hasThis());
+
+            if (compSupports(InstructionSet_AVX) && varTypeIsFloating(baseType))
+            {
+                op1     = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd);
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 32);
+            }
+            break;
+        }
+
         case NI_Base_Vector256_Zero:
         {
             assert(sig->numArgs == 0);
@@ -4419,7 +4523,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                 {
                     className += 3;
 
-                    if (strcmp(className, "`1") == 0)
+#if defined(_TARGET_XARCH_)
+                    if (className[0] == '\0')
+                    {
+                        if (strcmp(methodName, "CreateScalarUnsafe") == 0)
+                        {
+                            result = NI_Base_Vector128_CreateScalarUnsafe;
+                        }
+                    }
+                    else
+#endif // _TARGET_XARCH_
+                        if (strcmp(className, "`1") == 0)
                     {
                         if (strncmp(methodName, "As", 2) == 0)
                         {
@@ -4475,6 +4589,28 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                         {
                             result = NI_Base_Vector128_Zero;
                         }
+                        else if (strncmp(methodName, "To", 2) == 0)
+                        {
+                            methodName += 2;
+
+                            if (strcmp(methodName, "Scalar") == 0)
+                            {
+                                result = NI_Base_Vector128_ToScalar;
+                            }
+                            else if (strncmp(methodName, "Vector256", 9) == 0)
+                            {
+                                methodName += 9;
+
+                                if (methodName[0] == '\0')
+                                {
+                                    result = NI_Base_Vector128_ToVector256;
+                                }
+                                else if (strcmp(methodName, "Unsafe") == 0)
+                                {
+                                    result = NI_Base_Vector128_ToVector256Unsafe;
+                                }
+                            }
+                        }
 #endif // _TARGET_XARCH_
                     }
                 }
@@ -4483,7 +4619,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                 {
                     className += 3;
 
-                    if (strcmp(className, "`1") == 0)
+                    if (className[0] == '\0')
+                    {
+                        if (strcmp(methodName, "CreateScalarUnsafe") == 0)
+                        {
+                            result = NI_Base_Vector256_CreateScalarUnsafe;
+                        }
+                    }
+                    else if (strcmp(className, "`1") == 0)
                     {
                         if (strncmp(methodName, "As", 2) == 0)
                         {
@@ -4538,6 +4681,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                         {
                             result = NI_Base_Vector256_Zero;
                         }
+                        else if (strcmp(methodName, "GetLower") == 0)
+                        {
+                            result = NI_Base_Vector256_GetLower;
+                        }
+                        else if (strcmp(methodName, "ToScalar") == 0)
+                        {
+                            result = NI_Base_Vector256_ToScalar;
+                        }
                     }
                 }
 #endif // _TARGET_XARCH_
index c5cc71e..774334c 100644 (file)
@@ -2372,6 +2372,60 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
         // must be handled within the case.
         switch (intrinsicId)
         {
+            case NI_Base_Vector128_CreateScalarUnsafe:
+            case NI_Base_Vector128_ToScalar:
+            case NI_Base_Vector256_CreateScalarUnsafe:
+            case NI_Base_Vector256_ToScalar:
+            {
+                assert(numArgs == 1);
+
+                if (varTypeIsFloating(baseType))
+                {
+                    if (op1->isContained())
+                    {
+                        srcCount += BuildOperandUses(op1);
+                    }
+                    else
+                    {
+                        // We will either be in memory and need to be moved
+                        // into a register of the appropriate size or we
+                        // are already in an XMM/YMM register and can stay
+                        // where we are.
+
+                        tgtPrefUse = BuildUse(op1);
+                        srcCount += 1;
+                    }
+
+                    buildUses = false;
+                }
+                break;
+            }
+
+            case NI_Base_Vector128_ToVector256:
+            case NI_Base_Vector128_ToVector256Unsafe:
+            case NI_Base_Vector256_GetLower:
+            {
+                assert(numArgs == 1);
+
+                if (op1->isContained())
+                {
+                    srcCount += BuildOperandUses(op1);
+                }
+                else
+                {
+                    // We will either be in memory and need to be moved
+                    // into a register of the appropriate size or we
+                    // are already in an XMM/YMM register and can stay
+                    // where we are.
+
+                    tgtPrefUse = BuildUse(op1);
+                    srcCount += 1;
+                }
+
+                buildUses = false;
+                break;
+            }
+
             case NI_SSE_CompareEqualOrderedScalar:
             case NI_SSE_CompareEqualUnorderedScalar:
             case NI_SSE_CompareNotEqualOrderedScalar:
index 41f069f..445ccb9 100644 (file)
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Runtime.CompilerServices;
 using Internal.Runtime.CompilerServices;
 
 namespace System.Runtime.Intrinsics
@@ -707,6 +708,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector128<byte> CreateScalarUnsafe(byte value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -720,6 +722,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Double}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector128<double> CreateScalarUnsafe(double value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -733,6 +736,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector128<short> CreateScalarUnsafe(short value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -746,6 +750,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector128<int> CreateScalarUnsafe(int value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -759,6 +764,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector128<long> CreateScalarUnsafe(long value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -772,6 +778,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector128<sbyte> CreateScalarUnsafe(sbyte value)
         {
@@ -786,6 +793,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Single}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector128<float> CreateScalarUnsafe(float value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -799,6 +807,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector128<ushort> CreateScalarUnsafe(ushort value)
         {
@@ -813,6 +822,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector128<uint> CreateScalarUnsafe(uint value)
         {
@@ -827,6 +837,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector128<ulong> CreateScalarUnsafe(ulong value)
         {
index 95f217d..3c15de6 100644 (file)
@@ -303,6 +303,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Converts the current instance to a scalar containing the value of the first element.</summary>
         /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [Intrinsic]
         public T ToScalar()
         {
             ThrowIfUnsupportedType();
@@ -356,6 +357,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Converts the current instance to a new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits initialized to zero.</summary>
         /// <returns>A new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits initialized to zero.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [Intrinsic]
         public Vector256<T> ToVector256()
         {
             ThrowIfUnsupportedType();
@@ -369,6 +371,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Converts the current instance to a new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits left uninitialized.</summary>
         /// <returns>A new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits left uninitialized.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [Intrinsic]
         public unsafe Vector256<T> ToVector256Unsafe()
         {
             ThrowIfUnsupportedType();
index 6863a86..d22e1c0 100644 (file)
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Runtime.CompilerServices;
 using Internal.Runtime.CompilerServices;
 
 namespace System.Runtime.Intrinsics
@@ -905,6 +906,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector256<byte> CreateScalarUnsafe(byte value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -918,6 +920,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector256<double> CreateScalarUnsafe(double value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -931,6 +934,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector256<short> CreateScalarUnsafe(short value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -944,6 +948,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector256<int> CreateScalarUnsafe(int value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -957,6 +962,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector256<long> CreateScalarUnsafe(long value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -970,6 +976,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector256<sbyte> CreateScalarUnsafe(sbyte value)
         {
@@ -984,6 +991,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         public static unsafe Vector256<float> CreateScalarUnsafe(float value)
         {
             // This relies on us stripping the "init" flag from the ".locals"
@@ -997,6 +1005,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector256<ushort> CreateScalarUnsafe(ushort value)
         {
@@ -1011,6 +1020,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector256<uint> CreateScalarUnsafe(uint value)
         {
@@ -1025,6 +1035,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
         [CLSCompliant(false)]
         public static unsafe Vector256<ulong> CreateScalarUnsafe(ulong value)
         {
index fe0fc21..dd18a4c 100644 (file)
@@ -254,6 +254,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Gets the value of the lower 128-bits as a new <see cref="Vector128{T}" />.</summary>
         /// <returns>The value of the lower 128-bits as a new <see cref="Vector128{T}" />.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [Intrinsic]
         public Vector128<T> GetLower()
         {
             ThrowIfUnsupportedType();
@@ -305,6 +306,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Converts the current instance to a scalar containing the value of the first element.</summary>
         /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [Intrinsic]
         public T ToScalar()
         {
             ThrowIfUnsupportedType();