HARDWARE_INTRINSIC(AVX2_Add, "Add", AVX2, -1, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2_Multiply, "Multiply", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2_BlendVariable, "BlendVariable", AVX2, -1, 32, 3, {INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal, "LoadAlignedVector256NonTemporal", AVX2, -1, 32, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
// AES Intrinsics
HARDWARE_INTRINSIC(AES_IsSupported, "get_IsSupported", AES, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
INST3( pblendvb, "pblendvb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x10)) // Variable Blend Packed Bytes
INST3( lddqu, "lddqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xF0)) // Load Unaligned integer
INST3( movntdqa, "movntdqa" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2A)) // Load Double Quadword Non-Temporal Aligned Hint
+INST3( movddup, "movddup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x12)) // Replicate Double FP Values
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
// VEX encoding supports unaligned memory ops, so we can fold them
case NI_SSE_LoadVector128:
case NI_SSE2_LoadVector128:
+ case NI_AVX_LoadVector256:
+ case NI_AVX_LoadAlignedVector256:
isContainable = (containingCategory == HW_Category_SimpleSIMD) && comp->canUseVexEncoding();
break;
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+ class Program
+ {
+ const int Pass = 100;
+ const int Fail = 0;
+
+ static unsafe int Main(string[] args)
+ {
+ int testResult = Pass;
+
+ if (Avx2.IsSupported)
+ {
+ {
+ byte* inBuffer = stackalloc byte[64];
+ int* inArray = (int*)Align(inBuffer, 32);
+ int* outArray = stackalloc int[8];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 8; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on int:");
+ for (var n = 0; n < 8; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[64];
+ long* inArray = (long*)Align(inBuffer, 32);
+ long* outArray = stackalloc long[4];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 4; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on long:");
+ for (var n = 0; n < 4; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[64];
+ uint* inArray = (uint*)Align(inBuffer, 32);
+ uint* outArray = stackalloc uint[8];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 8; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on uint:");
+ for (var n = 0; n < 8; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[64];
+ ulong* inArray = (ulong*)Align(inBuffer, 32);
+ ulong* outArray = stackalloc ulong[4];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 4; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on ulong:");
+ for (var n = 0; n < 4; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[64];
+ short* inArray = (short*)Align(inBuffer, 32);
+ short* outArray = stackalloc short[16];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 16; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on short:");
+ for (var n = 0; n < 16; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[64];
+ ushort* inArray = (ushort*)Align(inBuffer, 32);
+ ushort* outArray = stackalloc ushort[16];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 16; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on ushort:");
+ for (var n = 0; n < 16; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[64];
+ sbyte* inArray = (sbyte*)Align(inBuffer, 32);
+ sbyte* outArray = stackalloc sbyte[32];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 32; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on sbyte:");
+ for (var n = 0; n < 32; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ byte* inBuffer = stackalloc byte[64];
+ byte* inArray = (byte*)Align(inBuffer, 32);
+ byte* outArray = stackalloc byte[32];
+ var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+ Unsafe.Write(outArray, vf);
+
+ for (var i = 0; i < 32; i++)
+ {
+ if (inArray[i] != outArray[i])
+ {
+ Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on byte:");
+ for (var n = 0; n < 32; n++)
+ {
+ Console.Write(outArray[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+ }
+
+ return testResult;
+ }
+
+ static unsafe void* Align(byte* buffer, byte expectedAlignment)
+ {
+ // Compute how bad the misalignment is, which is at most (expectedAlignment - 1).
+ // Then subtract that from the expectedAlignment and add it to the original address
+ // to compute the aligned address.
+
+ var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment);
+ return (void*)(buffer + misalignment);
+ }
+ }
+}
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize></Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="LoadAlignedVector256NonTemporal.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="LoadAlignedVector256NonTemporal.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>