Merge pull request #16832 from dotnetrt/StoreNonTemporal
authorCarol Eidt <carol.eidt@microsoft.com>
Wed, 14 Mar 2018 21:56:50 +0000 (14:56 -0700)
committerGitHub <noreply@github.com>
Wed, 14 Mar 2018 21:56:50 +0000 (14:56 -0700)
 Implement SSE2 StoreNonTemporal HW intrinsic - complete SSE2 ISA

src/jit/emitxarch.cpp
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/instrsxarch.h
tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj [new file with mode: 0644]

index 3f0b23d..4e1bec9 100644 (file)
@@ -312,11 +312,12 @@ bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins)
 bool emitter::TakesVexPrefix(instruction ins)
 {
     // special case vzeroupper as it requires 2-byte VEX prefix
-    // special case the fencing and the prefetch instructions as they never take a VEX prefix
+    // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
     switch (ins)
     {
         case INS_lfence:
         case INS_mfence:
+        case INS_movnti:
         case INS_prefetchnta:
         case INS_prefetcht0:
         case INS_prefetcht1:
@@ -418,13 +419,21 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
 
     if (IsSSEOrAVXInstruction(ins))
     {
-        if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si ||
-            ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm)
+        switch (ins)
         {
-            return true;
+            case INS_cvttsd2si:
+            case INS_cvttss2si:
+            case INS_cvtsd2si:
+            case INS_cvtss2si:
+            case INS_cvtsi2sd:
+            case INS_cvtsi2ss:
+            case INS_mov_xmm2i:
+            case INS_mov_i2xmm:
+            case INS_movnti:
+                return true;
+            default:
+                return false;
         }
-
-        return false;
     }
 
     // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
index 58fbde8..873f1c6 100644 (file)
@@ -1082,6 +1082,18 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             break;
         }
 
+        case NI_SSE2_StoreNonTemporal:
+        {
+            assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
+            assert(op1 != nullptr);
+            assert(op2 != nullptr);
+
+            op2Reg          = op2->gtRegNum;
+            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+            emit->emitIns_AR_R(ins, emitTypeSize(baseType), op2Reg, op1Reg, 0);
+            break;
+        }
+
         default:
             unreached();
             break;
index 88cfb97..c8c611f 100644 (file)
@@ -228,6 +228,7 @@ HARDWARE_INTRINSIC(SSE2_StoreAligned,                                "StoreAlign
 HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal,                     "StoreAlignedNonTemporal",                          SSE2,       -1,           16,          2,            {INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_invalid,   INS_movntpd},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_StoreHigh,                                   "StoreHigh",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhpd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_StoreLow,                                    "StoreLow",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movq,      INS_movq,      INS_invalid,   INS_movlpd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreNonTemporal,                            "StoreNonTemporal",                                 SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movnti,    INS_movnti,    INS_movnti,    INS_movnti,    INS_invalid,   INS_invalid},           HW_Category_Scalar,                            HW_Flag_NoRMWSemantics|HW_Flag_SecondArgMaybe64Bit)
 HARDWARE_INTRINSIC(SSE2_StoreScalar,                                 "StoreScalar",                                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsdsse2},         HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Subtract,                                    "Subtract",                                         SSE2,       -1,           16,          2,            {INS_psubb,     INS_psubb,     INS_psubw,     INS_psubw,     INS_psubd,     INS_psubd,     INS_psubq,     INS_psubq,     INS_invalid,   INS_subpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_SubtractSaturate,                            "SubtractSaturate",                                 SSE2,       -1,           16,          2,            {INS_psubsb,    INS_psubusb,   INS_psubsw,    INS_psubusw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
index fd228d7..56c7e99 100644 (file)
@@ -546,7 +546,6 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
 {
     switch (isa)
     {
-        case InstructionSet_SSE2:
         case InstructionSet_SSE42:
         case InstructionSet_AVX:
         case InstructionSet_AVX2:
@@ -558,6 +557,7 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
             return false;
 
         case InstructionSet_SSE:
+        case InstructionSet_SSE2:
         case InstructionSet_SSE3:
         case InstructionSet_SSSE3:
         case InstructionSet_SSE41:
@@ -1005,6 +1005,16 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_SSE2_StoreNonTemporal:
+        {
+            assert(sig->numArgs == 2);
+            assert(JITtype2varType(sig->retType) == TYP_VOID);
+            op2     = impPopStack().val;
+            op1     = impPopStack().val;
+            retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0);
+            break;
+        }
+
         default:
             JITDUMP("Not implemented hardware intrinsic");
             break;
index b5b88da..c0cd91d 100644 (file)
@@ -196,6 +196,7 @@ INST3( cvttsd2si,   "cvttsd2si"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE
 
 #ifndef LEGACY_BACKEND
 INST3( movntdq,     "movntdq"     , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE)
+INST3( movnti,      "movnti"      , 0, IUM_WR, 0, 0, PCKFLT(0xC3), BAD_CODE, BAD_CODE)
 INST3( movntpd,     "movntpd"     , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE)
 INST3( movntps,     "movntps"     , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE)
 INST3( movdqu,      "movdqu"      , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs
new file mode 100644 (file)
index 0000000..76b468e
--- /dev/null
@@ -0,0 +1,171 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+    class Program
+    {
+        const int Pass = 100;
+        const int Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            int testResult = Pass;
+
+            if (Sse2.IsSupported)
+            {
+                if (Environment.Is64BitProcess)
+                {
+                    {
+                        long* inArray = stackalloc long[2];
+                        inArray[0] = 0xffffffff01l;
+                        long* outBuffer = stackalloc long[2];
+
+                        Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+                        for (var i = 0; i < 2; i++)
+                        {
+                            if (inArray[i] != outBuffer[i])
+                            {
+                                Console.WriteLine("Sse2 StoreNonTemporal failed on long:");
+                                for (var n = 0; n < 2; n++)
+                                {
+                                    Console.Write(outBuffer[n] + ", ");
+                                }
+                                Console.WriteLine();
+
+                                testResult = Fail;
+                                break;
+                            }
+                        }
+                    }
+
+                    {
+                        ulong* inArray = stackalloc ulong[2];
+                        inArray[0] = 0xffffffffff01ul;
+                        ulong* outBuffer = stackalloc ulong[2];
+
+                        Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+                        for (var i = 0; i < 2; i++)
+                        {
+                            if (inArray[i] != outBuffer[i])
+                            {
+                                Console.WriteLine("Sse2 StoreNonTemporal failed on ulong:");
+                                for (var n = 0; n < 2; n++)
+                                {
+                                    Console.Write(outBuffer[n] + ", ");
+                                }
+                                Console.WriteLine();
+
+                                testResult = Fail;
+                                break;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    try
+                    {
+                        long* inArray = stackalloc long[2];
+                        inArray[0] = 0xffffffff01l;
+                        long* outBuffer = stackalloc long[2];
+
+                        Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+                        testResult = Fail;
+                        Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)} failed on long: expected PlatformNotSupportedException exception.");
+                    }
+                    catch (PlatformNotSupportedException)
+                    {
+
+                    }
+                    catch(Exception ex)
+                    {
+                        testResult = Fail;
+                        Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)}-{ex} failed on long: expected PlatformNotSupportedException exception.");
+                    }
+
+                    try
+                    {
+                        ulong* inArray = stackalloc ulong[2];
+                        inArray[0] = 0xffffffffff01ul;
+                        ulong* outBuffer = stackalloc ulong[2];
+
+                        Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+                        testResult = Fail;
+                        Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)} failed on ulong: expected PlatformNotSupportedException exception.");
+                    }
+                    catch (PlatformNotSupportedException)
+                    {
+
+                    }
+                    catch(Exception ex)
+                    {
+                        testResult = Fail;
+                        Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)}-{ex} failed on ulong: expected PlatformNotSupportedException exception.");                            
+                    }                    
+                }
+
+                {
+                    int* inArray = stackalloc int[4];
+                    inArray[0] = -784561;
+                    int* outBuffer = stackalloc int[4];
+
+                    Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+                    for (var i = 0; i < 4; i++)
+                    {
+                        if (inArray[i] != outBuffer[i])
+                        {
+                            Console.WriteLine("Sse2 StoreNonTemporal failed on int:");
+                            for (var n = 0; n < 4; n++)
+                            {
+                                Console.Write(outBuffer[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    uint* inArray = stackalloc uint[4];
+                    inArray[0] = 0xffffff02u;
+                    uint* outBuffer = stackalloc uint[4];
+
+                    Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+                    for (var i = 0; i < 4; i++)
+                    {
+                        if (inArray[i] != outBuffer[i])
+                        {
+                            Console.WriteLine("Sse2 StoreNonTemporal failed on uint:");
+                            for (var n = 0; n < 4; n++)
+                            {
+                                Console.Write(outBuffer[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+            }
+
+            return testResult;
+        }
+    }
+}
+
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj
new file mode 100644 (file)
index 0000000..8ca2a26
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize></Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="StoreNonTemporal.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj
new file mode 100644 (file)
index 0000000..4f00c2b
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="StoreNonTemporal.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file