HARDWARE_INTRINSIC(SSE41_IsSupported, "get_IsSupported", SSE41, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41_BlendVariable, "BlendVariable", SSE41, -1, 16, 3, {INS_pblendvb, INS_pblendvb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendvps, INS_blendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41_CompareEqual, "CompareEqual", SSE41, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE41_LoadAlignedVector128NonTemporal, "LoadAlignedVector128NonTemporal", SSE41, -1, 16, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41_Multiply, "Multiply", SSE41, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
// SSE42 Intrinsics
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+ class Program
+ {
+ const int Pass = 100;
+ const int Fail = 0;
+
+ static unsafe int Main(string[] args)
+ {
+ int testResult = Pass;
+
+ if (Sse41.IsSupported)
+ {
+ {
+ byte* inBuffer = stackalloc byte[32];
+ int* inArray = (int*)Align(inBuffer, 16);
+ int* outArray = stackalloc int[4];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 4; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on int:");
+ for (var n = 0; n < 4; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[32];
+ long* inArray = (long*)Align(inBuffer, 16);
+ long* outArray = stackalloc long[2];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 2; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on long:");
+ for (var n = 0; n < 2; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[32];
+ uint* inArray = (uint*)Align(inBuffer, 16);
+ uint* outArray = stackalloc uint[4];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 4; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on uint:");
+ for (var n = 0; n < 4; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[32];
+ ulong* inArray = (ulong*)Align(inBuffer, 16);
+ ulong* outArray = stackalloc ulong[2];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 2; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on ulong:");
+ for (var n = 0; n < 2; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[32];
+ short* inArray = (short*)Align(inBuffer, 16);
+ short* outArray = stackalloc short[8];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 8; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on short:");
+ for (var n = 0; n < 8; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[32];
+ ushort* inArray = (ushort*)Align(inBuffer, 16);
+ ushort* outArray = stackalloc ushort[8];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 8; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on ushort:");
+ for (var n = 0; n < 8; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[32];
+ sbyte* inArray = (sbyte*)Align(inBuffer, 16);
+ sbyte* outArray = stackalloc sbyte[16];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 16; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on sbyte:");
+ for (var n = 0; n < 16; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[32];
+ byte* inArray = (byte*)Align(inBuffer, 16);
+ byte* outArray = stackalloc byte[16];
+ var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 16; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on byte:");
+ for (var n = 0; n < 16; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+ }
+
+ return testResult;
+ }
+
+ static unsafe void* Align(byte* buffer, byte expectedAlignment)
+ {
+ // Compute how bad the misalignment is, which is at most (expectedAlignment - 1).
+ // Then subtract that from the expectedAlignment and add it to the original address
+ // to compute the aligned address.
+
+ var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment);
+ return (void*)(buffer + misalignment);
+ }
+ }
+}
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize></Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="LoadAlignedVector128NonTemporal.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="LoadAlignedVector128NonTemporal.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>