From c3a86f1f0aad72317477def83a4cebacca0f6042 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 9 Dec 2015 11:35:07 -0800 Subject: [PATCH] ConsoleMandel jit benchmark Initial port of this benchmark. When run from the command line, with no args, it prints usage and exits with success. With args, one can select one or more of the renderers to benchmark or run. Under xunit-performance this runs all 24 permutations available. --- .../CodeQuality/SIMD/ConsoleMandel/Abstractions.cs | 137 +++++++++++ .../SIMD/ConsoleMandel/ConsoleMandel.cs | 255 +++++++++++++++++++++ .../SIMD/ConsoleMandel/ConsoleMandel.csproj | 54 +++++ .../CodeQuality/SIMD/ConsoleMandel/Interfaces.cs | 144 ++++++++++++ .../CodeQuality/SIMD/ConsoleMandel/ScalarDouble.cs | 157 +++++++++++++ .../CodeQuality/SIMD/ConsoleMandel/ScalarFloat.cs | 131 +++++++++++ .../CodeQuality/SIMD/ConsoleMandel/VectorDouble.cs | 211 +++++++++++++++++ .../SIMD/ConsoleMandel/VectorDoubleStrict.cs | 214 +++++++++++++++++ .../CodeQuality/SIMD/ConsoleMandel/VectorFloat.cs | 213 +++++++++++++++++ .../SIMD/ConsoleMandel/VectorFloatStrict.cs | 190 +++++++++++++++ .../SIMD/ConsoleMandel/VectorHelpers.cs | 37 +++ tests/src/JIT/config/benchmark/project.json | 4 + tests/src/JIT/config/benchmark/project.lock.json | 177 ++++++++++++++ 13 files changed, 1924 insertions(+) create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Abstractions.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.csproj create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Interfaces.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarDouble.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarFloat.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDouble.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDoubleStrict.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloat.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloatStrict.cs create mode 100644 tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorHelpers.cs diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Abstractions.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Abstractions.cs new file mode 100644 index 0000000..f4156d5 --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Abstractions.cs @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Algorithms +{ + // A float implementation of the BCL Complex type that only + // contains the bare essentials, plus a couple operations needed + // for efficient Mandelbrot calcuation. + internal struct ComplexFloat + { + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public ComplexFloat(float real, float imaginary) + { + Real = real; Imaginary = imaginary; + } + + public float Real; + public float Imaginary; + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public ComplexFloat square() + { + return new ComplexFloat(Real * Real - Imaginary * Imaginary, 2.0f * Real * Imaginary); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public float sqabs() + { + return Real * Real + Imaginary * Imaginary; + } + + public override string ToString() + { + return String.Format("[{0} + {1}Imaginary]", Real, Imaginary); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public static ComplexFloat operator +(ComplexFloat a, ComplexFloat b) + { + return new ComplexFloat(a.Real + b.Real, a.Imaginary + b.Imaginary); + } + } + + // A couple extension methods that operate on BCL Complex types to help efficiently calculate + // the Mandelbrot set (They're instance methods on the ComplexFloat custom type) + public static partial class extensions + { + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public static double sqabs(this Complex val) + { + return val.Real * val.Real + val.Imaginary * val.Imaginary; + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public static Complex square(this Complex val) + { + return new Complex(val.Real * val.Real - val.Imaginary * val.Imaginary, 2.0 * val.Real * val.Imaginary); + } + } + + // This is an implementation of ComplexFloat that operates on Vector at a time SIMD types + internal struct ComplexVecFloat + { + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public ComplexVecFloat(Vector real, Vector imaginary) + { + Real = real; Imaginary = imaginary; + } + + public Vector Real; + public Vector Imaginary; + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public ComplexVecFloat square() + { + return new ComplexVecFloat(Real * Real - Imaginary * Imaginary, Real * Imaginary + Real * Imaginary); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public Vector sqabs() + { + return Real * Real + Imaginary * Imaginary; + } + + public override string ToString() + { + return String.Format("[{0} + {1}Imaginary]", Real, Imaginary); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public static ComplexVecFloat operator +(ComplexVecFloat a, ComplexVecFloat b) + { + return new ComplexVecFloat(a.Real + b.Real, a.Imaginary + b.Imaginary); + } + } + + // This is an implementation of Complex that operates on Vector at a time SIMD types + internal struct ComplexVecDouble + { + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public ComplexVecDouble(Vector real, Vector imaginary) + { + Real = real; Imaginary = imaginary; + } + + public Vector Real; + public Vector Imaginary; + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public ComplexVecDouble square() + { + return new ComplexVecDouble(Real * Real - Imaginary * Imaginary, Real * Imaginary + Real * Imaginary); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public Vector sqabs() + { + return Real * Real + Imaginary * Imaginary; + } + + public override string ToString() + { + return String.Format("[{0} + {1}Imaginary]", Real, Imaginary); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public static ComplexVecDouble operator +(ComplexVecDouble a, ComplexVecDouble b) + { + return new ComplexVecDouble(a.Real + b.Real, a.Imaginary + b.Imaginary); + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.cs new file mode 100644 index 0000000..7763b1c --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.cs @@ -0,0 +1,255 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using Microsoft.Xunit.Performance; +using System; +using System.Diagnostics; +using Xunit; + +[assembly: OptimizeForBenchmarks] +[assembly: MeasureInstructionsRetired] + +namespace ConsoleMandel +{ + public static class Program + { + private const int Pass = 100; + private const int Fail = -1; + private static bool s_silent = false; + + private static void DoNothing(int x, int y, int count) { } + + private static void DrawDot(int x, int y, int count) + { + if (x == 0) + Console.WriteLine(); + Console.Write((count < 1000) ? ' ' : '*'); + } + + private static Algorithms.FractalRenderer.Render GetRenderer(Action draw, int which) + { + return Algorithms.FractalRenderer.SelectRender(draw, Abort, IsVector(which), IsDouble(which), IsMulti(which), UsesADT(which), !UseIntTypes(which)); + } + + private static bool Abort() { return false; } + + private static bool UseIntTypes(int num) { return (num & 8) == 0; } + + private static bool IsVector(int num) { return num > 7; } + + private static bool IsDouble(int num) { return (num & 4) != 0; } + + private static bool IsMulti(int num) { return (num & 2) != 0; } + + private static bool UsesADT(int num) { return (num & 1) != 0; } + + private static void PrintDescription(int i) + { + Console.WriteLine("{0}: {1} {2}-Precision {3}Threaded using {4} and {5} int types", i, + IsVector(i) ? "Vector" : "Scalar", + IsDouble(i) ? "Double" : "Single", + IsMulti(i) ? "Multi" : "Single", + UsesADT(i) ? "ADT" : "Raw Values", + UseIntTypes(i) ? "using" : "not using any"); + } + + private static void PrintUsage() + { + Console.WriteLine("Usage:\n ConsoleMandel [0-23] -[bench #] where # is the number of iterations."); + for (int i = 0; i < 24; i++) + { + PrintDescription(i); + } + Console.WriteLine("The numeric argument selects the implementation number;"); + Console.WriteLine("If not specified, all are run."); + Console.WriteLine("In non-benchmark mode, dump a text view of the Mandelbrot set."); + Console.WriteLine("In benchmark mode, a larger set is computed but nothing is dumped."); + } + + private static int Main(string[] args) + { + try + { + int which = -1; + bool verbose = false; + bool bench = false; + int iters = 1; + int argNum = 0; + while (argNum < args.Length) + { + if (args[argNum].ToUpperInvariant() == "-BENCH") + { + bench = true; + if ((args.Length <= (argNum + 1)) || !Int32.TryParse(args[argNum + 1], out iters)) + { + iters = 5; + } + argNum++; + } + else if (args[argNum].ToUpperInvariant() == "-V") + { + verbose = true; + } + else if (args[argNum].ToUpperInvariant() == "-S") + { + s_silent = true; + } + else if (!Int32.TryParse(args[argNum], out which)) + { + PrintUsage(); + return Fail; + } + argNum++; + } + if (bench) + { + Bench(iters, which); + return Pass; + } + if (which == -1) + { + PrintUsage(); + return Pass; + } + if (verbose) + { + PrintDescription(which); + } + if (IsVector(which)) + { + if (verbose) + { + Console.WriteLine(" Vector Count is {0}", IsDouble(which) ? System.Numerics.Vector.Count : System.Numerics.Vector.Count); + Console.WriteLine(" {0} Accelerated.", System.Numerics.Vector.IsHardwareAccelerated ? "IS" : "IS NOT"); + } + } + var render = GetRenderer(DrawDot, which); + render(-1.5f, .5f, -1f, 1f, 2.0f / 60.0f); + return Pass; + } + catch (System.Exception) + { + return Fail; + } + } + + public static void Bench(int iters, int which) + { + float XC = -1.248f; + float YC = -.0362f; + float Range = .001f; + float xmin = XC - Range; + float xmax = XC + Range; + float ymin = YC - Range; + float ymax = YC + Range; + float step = Range / 1000f; // This will render one million pixels + float warm = Range / 100f; // To warm up, just render 10000 pixels :-) + Algorithms.FractalRenderer.Render[] renderers = new Algorithms.FractalRenderer.Render[24]; + // Warm up each renderer + if (!s_silent) + { + Console.WriteLine("Warming up..."); + } + Stopwatch timer = new Stopwatch(); + int firstRenderer = (which == -1) ? 0 : which; + int lastRenderer = (which == -1) ? (renderers.Length - 1) : which; + for (int i = firstRenderer; i <= lastRenderer; i++) + { + renderers[i] = GetRenderer(DoNothing, i); + timer.Restart(); + renderers[i](xmin, xmax, ymin, ymax, warm); + timer.Stop(); + if (!s_silent) + { + Console.WriteLine("{0}{1}{2}{3}{4} Complete [{5} ms]", + UseIntTypes(i) ? "IntBV " : "Strict ", + IsVector(i) ? "Vector " : "Scalar ", + IsDouble(i) ? "Double " : "Single ", + UsesADT(i) ? "ADT " : "Raw ", + IsMulti(i) ? "Multi " : "Single ", + timer.ElapsedMilliseconds); + } + } + if (!s_silent) + { + Console.WriteLine(" Run Type : Min Max Average Std-Dev"); + } + for (int i = firstRenderer; i <= lastRenderer; i++) + { + long totalTime = 0; + long min = long.MaxValue; + long max = long.MinValue; + for (int count = 0; count < iters; count++) + { + timer.Restart(); + renderers[i](xmin, xmax, ymin, ymax, step); + timer.Stop(); + long time = timer.ElapsedMilliseconds; + max = Math.Max(time, max); + min = Math.Min(time, min); + totalTime += time; + } + double avg = totalTime / (double)iters; + double stdDev = Math.Sqrt(totalTime / (iters - 1.0)) / avg; + if (s_silent) + { + Console.WriteLine("Average: {0,0:0.0}", avg); + } + else + { + Console.WriteLine("{0}{1}{2}{3}{4}: {5,8} {6,8} {7,10:0.0} {8,10:P}", + UseIntTypes(i) ? "IntBV " : "Strict ", + IsVector(i) ? "Vector " : "Scalar ", + IsDouble(i) ? "Double " : "Single ", + UsesADT(i) ? "ADT " : "Raw ", + IsMulti(i) ? "Multi " : "Single ", + min, max, avg, stdDev); + } + } + } + + public static void XBench(int iters, int which) + { + float XC = -1.248f; + float YC = -.0362f; + float Range = .001f; + float xmin = XC - Range; + float xmax = XC + Range; + float ymin = YC - Range; + float ymax = YC + Range; + float step = Range / 100f; + + Algorithms.FractalRenderer.Render renderer = GetRenderer(DoNothing, which); + + for (int count = 0; count < iters; count++) + { + renderer(xmin, xmax, ymin, ymax, step); + } + } + + [Benchmark] + public static void VectorFloatSinglethreadRawNoInt() + { + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + XBench(10, 8); + } + } + } + + [Benchmark] + public static void VectorFloatSinglethreadADTNoInt() + { + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + XBench(10, 9); + } + } + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.csproj b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.csproj new file mode 100644 index 0000000..e661017 --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ConsoleMandel.csproj @@ -0,0 +1,54 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + Properties + 512 + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + $(ProgramFiles)\Common Files\microsoft shared\VSTT\11.0\UITestExtensionPackages + ..\..\ + 7a9bfb7d + + + + + + pdbonly + true + + + + False + + + + + + + + + + + + + + + + + + + + + + $(JitPackagesConfigFileDirectory)benchmark\project.json + $(JitPackagesConfigFileDirectory)benchmark\project.lock.json + + + + + diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Interfaces.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Interfaces.cs new file mode 100644 index 0000000..65c66a3 --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/Interfaces.cs @@ -0,0 +1,144 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; + +namespace Algorithms +{ + public abstract class FractalRenderer + { + public delegate void Render(float xmin, float xmax, float ymin, float ymax, float step); + + private Func _abort; + private Action _drawPixel; + protected const int max_iters = 1000; // Make this higher to see more detail when zoomed in (and slow down rendering a lot) + + protected FractalRenderer(Action draw, Func checkAbort) + { + _drawPixel = draw; _abort = checkAbort; + } + + protected Action DrawPixel { get { return _drawPixel; } } + + public bool Abort { get { return _abort(); } } + + public static Render SelectRender(Action draw, Func abort, bool useVectorTypes, bool doublePrecision, bool isMultiThreaded, bool useAbstractDataType, bool dontUseIntTypes = true) + { + if (useVectorTypes && doublePrecision) + { + if (dontUseIntTypes) + { + var r = new VectorDoubleStrictRenderer(draw, abort); + if (isMultiThreaded) + { + if (useAbstractDataType) + return r.RenderMultiThreadedWithADT; + else // !useAbstractDataType + return r.RenderMultiThreadedNoADT; + } + else // !isMultiThreaded + { + if (useAbstractDataType) + return r.RenderSingleThreadedWithADT; + else // !useAbstractDataType + return r.RenderSingleThreadedNoADT; + } + } + else // !dontUseIntTypes + { + var r = new VectorDoubleRenderer(draw, abort); + if (isMultiThreaded) + { + if (useAbstractDataType) + return r.RenderMultiThreadedWithADT; + else // !useAbstractDataType + return r.RenderMultiThreadedNoADT; + } + else // !isMultiThreaded + { + if (useAbstractDataType) + return r.RenderSingleThreadedWithADT; + else // !useAbstractDataType + return r.RenderSingleThreadedNoADT; + } + } + } + else if (useVectorTypes && !doublePrecision) + { + if (dontUseIntTypes) + { + var r = new VectorFloatStrictRenderer(draw, abort); + if (isMultiThreaded) + { + if (useAbstractDataType) + return r.RenderMultiThreadedWithADT; + else // !useAbstractDataType + return r.RenderMultiThreadedNoADT; + } + else // !isMultiThreaded + { + if (useAbstractDataType) + return r.RenderSingleThreadedWithADT; + else // !useAbstractDataType + return r.RenderSingleThreadedNoADT; + } + } + else // !dontUseIntTypes + { + var r = new VectorFloatRenderer(draw, abort); + if (isMultiThreaded) + { + if (useAbstractDataType) + return r.RenderMultiThreadedWithADT; + else // !useAbstractDataType + return r.RenderMultiThreadedNoADT; + } + else // !isMultiThreaded + { + if (useAbstractDataType) + return r.RenderSingleThreadedWithADT; + else // !useAbstractDataType + return r.RenderSingleThreadedNoADT; + } + } + } + else if (!useVectorTypes && doublePrecision) + { + var r = new ScalarDoubleRenderer(draw, abort); + if (isMultiThreaded) + { + if (useAbstractDataType) + return r.RenderMultiThreadedWithADT; + else // !useAbstractDataType + return r.RenderMultiThreadedNoADT; + } + else // !isMultiThreaded + { + if (useAbstractDataType) + return r.RenderSingleThreadedWithADT; + else // !useAbstractDataType + return r.RenderSingleThreadedNoADT; + } + } + else // (!useVectorTypes && !doublePrecision) + { + var r = new ScalarFloatRenderer(draw, abort); + if (isMultiThreaded) + { + if (useAbstractDataType) + return r.RenderMultiThreadedWithADT; + else // !useAbstractDataType + return r.RenderMultiThreadedNoADT; + } + else // !isMultiThreaded + { + if (useAbstractDataType) + return r.RenderSingleThreadedWithADT; + else // !useAbstractDataType + return r.RenderSingleThreadedNoADT; + } + } + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarDouble.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarDouble.cs new file mode 100644 index 0000000..a49bc03 --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarDouble.cs @@ -0,0 +1,157 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Numerics; +using System.Threading.Tasks; + +namespace Algorithms +{ + // This class contains renderers that use scalar doubles + internal class ScalarDoubleRenderer : FractalRenderer + { + public ScalarDoubleRenderer(Action dp, Func abortFunc) + : base(dp, abortFunc) + { + } + + protected const double limit = 4.0; + + // Render the fractal using the BCL Complex data type abstraction on a single thread with scalar doubles + public void RenderSingleThreadedWithADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + int yp = 0; + for (double y = ymin; y < ymax && !Abort; y += step, yp++) + { + int xp = 0; + for (double x = xmin; x < xmax; x += step, xp++) + { + Complex num = new Complex(x, y); + Complex accum = num; + int iters = 0; + double sqabs = 0f; + do + { + accum = accum.square(); + accum += num; + iters++; + sqabs = accum.sqabs(); + } while (sqabs < limit && iters < max_iters); + + DrawPixel(xp, yp, iters); + } + } + } + + // Render the fractal with no data type abstraction on a single thread with scalar doubles + public void RenderSingleThreadedNoADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + int yp = 0; + for (double y = ymin; y < ymax && !Abort; y += step, yp++) + { + int xp = 0; + for (double x = xmin; x < xmax; x += step, xp++) + { + double accumx = x; + double accumy = y; + int iters = 0; + double sqabs = 0.0; + do + { + double naccumx = accumx * accumx - accumy * accumy; + double naccumy = 2.0 * accumx * accumy; + accumx = naccumx + x; + accumy = naccumy + y; + iters++; + sqabs = accumx * accumx + accumy * accumy; + } while (sqabs < limit && iters < max_iters); + + DrawPixel(xp, yp, iters); + } + } + } + + // Render the fractal using the BCL Complex data type abstraction on multiple threads with scalar doubles + public void RenderMultiThreadedWithADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5), (yp) => + { + if (Abort) + return; + double y = ymin + step * yp; + int xp = 0; + for (double x = xmin; x < xmax; x += step, xp++) + { + Complex num = new Complex(x, y); + Complex accum = num; + int iters = 0; + double sqabs = 0f; + do + { + accum = accum.square(); + accum += num; + iters++; + sqabs = accum.sqabs(); + } while (sqabs < limit && iters < max_iters); + + DrawPixel(xp, yp, iters); + } + }); + } + + // Render the fractal with no data type abstraction on multiple threads with scalar doubles + public void RenderMultiThreadedNoADT(float xmind, float xmaxd, float ymind, float ymaxd, float stepd) + { + double xmin = (double)xmind; + double xmax = (double)xmaxd; + double ymin = (double)ymind; + double ymax = (double)ymaxd; + double step = (double)stepd; + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5), (yp) => + { + if (Abort) + return; + double y = ymin + step * yp; + int xp = 0; + for (double x = xmin; x < xmax; x += step, xp++) + { + double accumx = x; + double accumy = y; + int iters = 0; + double sqabs = 0.0; + do + { + double naccumx = accumx * accumx - accumy * accumy; + double naccumy = 2.0 * accumx * accumy; + accumx = naccumx + x; + accumy = naccumy + y; + iters++; + sqabs = accumx * accumx + accumy * accumy; + } while (sqabs < limit && iters < max_iters); + + DrawPixel(xp, yp, iters); + } + }); + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarFloat.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarFloat.cs new file mode 100644 index 0000000..1efaebf --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/ScalarFloat.cs @@ -0,0 +1,131 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Threading.Tasks; + +namespace Algorithms +{ + // This class contains renderers that use scalar floats + internal class ScalarFloatRenderer : FractalRenderer + { + public ScalarFloatRenderer(Action dp, Func abortFunc) + : base(dp, abortFunc) + { + } + + protected const float limit = 4.0f; + + // Render the fractal using a Complex data type on a single thread with scalar floats + public void RenderSingleThreadedWithADT(float xmin, float xmax, float ymin, float ymax, float step) + { + int yp = 0; + for (float y = ymin; y < ymax && !Abort; y += step, yp++) + { + int xp = 0; + for (float x = xmin; x < xmax; x += step, xp++) + { + ComplexFloat num = new ComplexFloat(x, y); + ComplexFloat accum = num; + int iters = 0; + float sqabs = 0f; + do + { + accum = accum.square(); + accum += num; + iters++; + sqabs = accum.sqabs(); + } while (sqabs < limit && iters < max_iters); + + DrawPixel(xp, yp, iters); + } + } + } + + // Render the fractal with no data type abstraction on a single thread with scalar floats + public void RenderSingleThreadedNoADT(float xmin, float xmax, float ymin, float ymax, float step) + { + int yp = 0; + for (float y = ymin; y < ymax && !Abort; y += step, yp++) + { + int xp = 0; + for (float x = xmin; x < xmax; x += step, xp++) + { + float accumx = x; + float accumy = y; + int iters = 0; + float sqabs = 0f; + do + { + float naccumx = accumx * accumx - accumy * accumy; + float naccumy = 2.0f * accumx * accumy; + accumx = naccumx + x; + accumy = naccumy + y; + iters++; + sqabs = accumx * accumx + accumy * accumy; + } while (sqabs < limit && iters < max_iters); + DrawPixel(xp, yp, iters); + } + } + } + + // Render the fractal using a Complex data type on a single thread with scalar floats + public void RenderMultiThreadedWithADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Parallel.For(0, (int)(((ymax - ymin) / step) + .5f), (yp) => + { + if (Abort) + return; + float y = ymin + step * yp; + int xp = 0; + for (float x = xmin; x < xmax; x += step, xp++) + { + ComplexFloat num = new ComplexFloat(x, y); + ComplexFloat accum = num; + int iters = 0; + float sqabs = 0f; + do + { + accum = accum.square(); + accum += num; + iters++; + sqabs = accum.sqabs(); + } while (sqabs < limit && iters < max_iters); + + DrawPixel(xp, yp, iters); + } + }); + } + + // Render the fractal with no data type abstraction on multiple threads with scalar floats + public void RenderMultiThreadedNoADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Parallel.For(0, (int)(((ymax - ymin) / step) + .5f), (yp) => + { + if (Abort) + return; + float y = ymin + step * yp; + int xp = 0; + for (float x = xmin; x < xmax; x += step, xp++) + { + float accumx = x; + float accumy = y; + int iters = 0; + float sqabs = 0f; + do + { + float naccumx = accumx * accumx - accumy * accumy; + float naccumy = 2.0f * accumx * accumy; + accumx = naccumx + x; + accumy = naccumy + y; + iters++; + sqabs = accumx * accumx + accumy * accumy; + } while (sqabs < limit && iters < max_iters); + + DrawPixel(xp, yp, iters); + } + }); + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDouble.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDouble.cs new file mode 100644 index 0000000..7eb40bc --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDouble.cs @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Numerics; +using System.Threading.Tasks; + +namespace Algorithms +{ + // This contains renderers that only use Vector's and Vector types. + internal class VectorDoubleRenderer : FractalRenderer + { + private const double limit = 4.0; + + private static Vector s_dummy = Vector.One; + + public VectorDoubleRenderer(Action dp, Func abortFunc) + : base(dp, abortFunc) + { + } + + // Render the fractal on a single thread using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderSingleThreadedNoADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vlimit = new Vector(limit); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vstep = new Vector(step); + Vector vmax_iters = new Vector(max_iters); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + double y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); y <= ymax && !Abort; vy += vstep, y += step, yp++) + { + int xp = 0; + Vector vxmaxd = new Vector(xmax); + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmaxd); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector naccumy = accumx * accumy + accumx * accumy; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + } + } + + // Render the fractal on a single thread using the ComplexVecDouble data type + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderSingleThreadedWithADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vlimit = new Vector(limit); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vstep = new Vector(step); + Vector vmax_iters = new Vector(max_iters); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + double y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); y <= ymax && !Abort; vy += vstep, y += step, yp++) + { + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + ComplexVecDouble num = new ComplexVecDouble(vx, vy); + ComplexVecDouble accum = num; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + + do + { + accum = accum.square() + num; + viters += increment; + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + } + } + + // Render the fractal on multiple threads using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderMultiThreadedNoADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vmax_iters = new Vector(max_iters); + Vector vlimit = new Vector(limit); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vstep = new Vector(step); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector naccumy = accumx * accumy + accumx * accumy; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + }); + } + + // Render the fractal on multiple threads using the ComplexVecDouble data type + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderMultiThreadedWithADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vmax_iters = new Vector(max_iters); + Vector vlimit = new Vector(limit); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vstep = new Vector(step); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + ComplexVecDouble num = new ComplexVecDouble(vx, vy); + ComplexVecDouble accum = num; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + + do + { + accum = accum.square() + num; + viters += increment; + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + }); + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDoubleStrict.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDoubleStrict.cs new file mode 100644 index 0000000..d2d8068 --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorDoubleStrict.cs @@ -0,0 +1,214 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Numerics; +using System.Threading.Tasks; + +namespace Algorithms +{ + // This contains renderers that only use Vector's with no Vector types. It's + // primarily useful when targeting AVX (not AVX2), because AVX doesn't support 256 bits of + // integer values, only floating point values. + internal class VectorDoubleStrictRenderer : FractalRenderer + { + private const double limit = 4.0; + + private static Vector s_dummy; + + static VectorDoubleStrictRenderer() + { + s_dummy = Vector.One; + } + + public VectorDoubleStrictRenderer(Action dp, Func abortFunc) + : base(dp, abortFunc) + { + } + + // Render the fractal on multiple threads using the ComplexVecDouble data type + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderMultiThreadedWithADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vmax_iters = new Vector((double)max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + ComplexVecDouble num = new ComplexVecDouble(vx, vy); + ComplexVecDouble accum = num; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + accum = accum.square() + num; + viters += increment; + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + }); + } + + // Render the fractal on multiple threads using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderMultiThreadedNoADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vmax_iters = new Vector((double)max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector naccumy = accumx * accumy + accumx * accumy; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + }); + } + + // Render the fractal on a single thread using the ComplexVecDouble data type + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderSingleThreadedWithADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vmax_iters = new Vector((double)max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + double y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); y <= ymax && !Abort; vy += vstep, y += step, yp++) + { + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + ComplexVecDouble num = new ComplexVecDouble(vx, vy); + ComplexVecDouble accum = num; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + accum = accum.square() + num; + viters += increment; + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + } + } + + // Render the fractal on a single thread using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderSingleThreadedNoADT(float xminf, float xmaxf, float yminf, float ymaxf, float stepf) + { + double xmin = (double)xminf; + double xmax = (double)xmaxf; + double ymin = (double)yminf; + double ymax = (double)ymaxf; + double step = (double)stepf; + + Vector vmax_iters = new Vector((double)max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((double)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + double y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); y <= ymax && !Abort; vy += vstep, y += step, yp++) + { + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector naccumy = accumx * accumy + accumx * accumy; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + } + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloat.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloat.cs new file mode 100644 index 0000000..5199fbf --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloat.cs @@ -0,0 +1,213 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Numerics; +using System.Threading.Tasks; + +namespace Algorithms +{ + // This class contains renderers that use Vector (SIMD) floats + internal class VectorFloatRenderer : FractalRenderer + { + private const float limit = 4.0f; + + public VectorFloatRenderer(Action dp, Func abortFunc) + : base(dp, abortFunc) + { + } + + // Render the fractal on a single thread using the ComplexFloatVec data type + // This is the implementation that has the best comments. + public void RenderSingleThreadedWithADT(float xmin, float xmax, float ymin, float ymax, float step) + { + // Initialize a pile of method constant vectors + Vector vmax_iters = new Vector(max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vxmax = new Vector(xmax); + Vector vinc = new Vector((float)Vector.Count * step); + // Use my little helper routine: it's kind of slow, but I find it pleasantly readable. + // The alternative would be this: + // float[] xmins = new float[Vector.Count]; + // for (int i = 0; i < xmins.Count; i++) + // xmins[i] = xmin + step * i; + // Vector vxmin = new Vector(xmins); + // Both allocate some memory, this one just does it in a separate routine :-) + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + float y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); + y <= ymax && !Abort; + vy += vstep, y += step, yp++) + { + int xp = 0; + for (Vector vx = vxmin; + Vector.LessThanOrEqualAny(vx, vxmax); // Vector.{comparision}Any|All return bools, not masks + vx += vinc, xp += Vector.Count) + { + ComplexVecFloat num = new ComplexVecFloat(vx, vy); + ComplexVecFloat accum = num; + + Vector viters = Vector.Zero; // Iteration counts start at all zeros + Vector increment = Vector.One; // Increment starts out as all ones + do + { + // This is work that can be vectorized + accum = accum.square() + num; + // Increment the iteration count Only pixels that haven't already hit the + // limit will be incremented because the increment variable gets masked below + viters += increment; + // Create a mask that correspons to the element-wise logical operation + // "accum <= limit && iters <= max_iters" Note that the bitwise and is used, + // because the Vector.{comparision} operations return masks, not boolean values + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + // increment becomes zero for the elems that have hit the limit because + // vCond is a mask of all zeros or ones, based on the results of the + // Vector.{comparison} operations + increment = increment & vCond; + // Keep going until we have no elements that haven't either hit the value + // limit or the iteration count + } while (increment != Vector.Zero); + + // This is another little helper I created. It's definitely kind of slow but I + // find it pleasantly succinct. It could also be written like this: + // + // for (int eNum = 0; eNum < Vector.Count; eNum++) + // DrawPixel(xp + eNum, yp, viters[eNum]); + // + // Neither implementation is particularly fast, because pulling individual elements + // is a slow operation for vector types. + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, iter)); + } + } + } + + // Render the fractal on a single thread using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT + public void RenderSingleThreadedNoADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Vector vmax_iters = new Vector(max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vxmax = new Vector(xmax); + Vector vinc = new Vector((float)Vector.Count * step); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + float y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); y <= ymax && !Abort; vy += vstep, y += step, yp++) + { + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector naccumy = accumx * accumy + accumx * accumy; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + } + } + + // Render the fractal on multiple threads using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT + public void RenderMultiThreadedNoADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Vector vmax_iters = new Vector(max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((float)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5f), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector XtimesY = accumx * accumy; + Vector naccumy = XtimesY + XtimesY; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + }); + } + + // Render the fractal on multiple threads using the ComplexFloatVec data type + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT + public void RenderMultiThreadedWithADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Vector vmax_iters = new Vector(max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((float)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5f), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + ComplexVecFloat num = new ComplexVecFloat(vx, vy); + ComplexVecFloat accum = num; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + accum = accum.square() + num; + viters += increment; + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, iter)); + } + }); + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloatStrict.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloatStrict.cs new file mode 100644 index 0000000..bb4ded4 --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorFloatStrict.cs @@ -0,0 +1,190 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Numerics; +using System.Threading.Tasks; + +namespace Algorithms +{ + // This contains renderers that only use Vector's with no Vector types. It was + // originally focused on targeting AVX (not AVX2), because AVX doesn't support 256 bits of + // integer values, only floating point values. + internal class VectorFloatStrictRenderer : FractalRenderer + { + private const float limit = 4.0f; + + private static Vector s_dummy; + + static VectorFloatStrictRenderer() + { + s_dummy = Vector.One; + } + + public VectorFloatStrictRenderer(Action dp, Func abortFunc) + : base(dp, abortFunc) + { + } + + // Render the fractal on multiple threads using the ComplexFloatVec data type + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderMultiThreadedWithADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Vector vmax_iters = new Vector((float)max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((float)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5f), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + ComplexVecFloat num = new ComplexVecFloat(vx, vy); + ComplexVecFloat accum = num; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + accum = accum.square() + num; + viters += increment; + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + }); + } + + // Render the fractal on multiple threads using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderMultiThreadedNoADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Vector vmax_iters = new Vector((float)max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vinc = new Vector((float)Vector.Count * step); + Vector vxmax = new Vector(xmax); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + Parallel.For(0, (int)(((ymax - ymin) / step) + .5f), (yp) => + { + if (Abort) + return; + + Vector vy = new Vector(ymin + step * yp); + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector naccumy = accumx * accumy + accumx * accumy; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + }); + } + + // Render the fractal on a single thread using the ComplexFloatVec data type + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderSingleThreadedWithADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Vector vmax_iters = new Vector((float)max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vxmax = new Vector(xmax); + Vector vinc = new Vector((float)Vector.Count * step); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + float y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); y <= ymax && !Abort; vy += vstep, y += step, yp++) + { + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + ComplexVecFloat num = new ComplexVecFloat(vx, vy); + ComplexVecFloat accum = num; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + accum = accum.square() + num; + viters += increment; + Vector vCond = Vector.LessThanOrEqual(accum.sqabs(), vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + } + } + + // Render the fractal on a single thread using raw Vector data types + // For a well commented version, go see VectorFloatRenderer.RenderSingleThreadedWithADT in VectorFloat.cs + public void RenderSingleThreadedNoADT(float xmin, float xmax, float ymin, float ymax, float step) + { + Vector vmax_iters = new Vector(max_iters); + Vector vlimit = new Vector(limit); + Vector vstep = new Vector(step); + Vector vxmax = new Vector(xmax); + Vector vinc = new Vector((float)Vector.Count * step); + Vector vxmin = VectorHelper.Create(i => xmin + step * i); + + float y = ymin; + int yp = 0; + for (Vector vy = new Vector(ymin); y <= ymax && !Abort; vy += vstep, y += step, yp++) + { + int xp = 0; + for (Vector vx = vxmin; Vector.LessThanOrEqualAny(vx, vxmax); vx += vinc, xp += Vector.Count) + { + Vector accumx = vx; + Vector accumy = vy; + + Vector viters = Vector.Zero; + Vector increment = Vector.One; + do + { + Vector naccumx = accumx * accumx - accumy * accumy; + Vector naccumy = accumx * accumy + accumx * accumy; + accumx = naccumx + vx; + accumy = naccumy + vy; + viters += increment; + Vector sqabs = accumx * accumx + accumy * accumy; + Vector vCond = Vector.LessThanOrEqual(sqabs, vlimit) & + Vector.LessThanOrEqual(viters, vmax_iters); + increment = increment & vCond; + } while (increment != Vector.Zero); + + viters.ForEach((iter, elemNum) => DrawPixel(xp + elemNum, yp, (int)iter)); + } + } + } + } +} diff --git a/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorHelpers.cs b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorHelpers.cs new file mode 100644 index 0000000..f424b6f --- /dev/null +++ b/tests/src/JIT/Performance/CodeQuality/SIMD/ConsoleMandel/VectorHelpers.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Algorithms +{ + public static class VectorHelper + { + // Helper to construct a vector from a lambda that takes an + // index. It's not efficient, but it's more succint than the + // corresponding for loop. Don't use it on a hot code path + // (i.e. inside a loop) + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public static Vector Create(Func creator) where T : struct + { + T[] data = new T[Vector.Count]; + for (int i = 0; i < data.Length; i++) + data[i] = creator(i); + return new Vector(data); + } + + // Helper to invoke a function for each element of the + // vector. This is NOT fast. I just like the way it looks + // better than a for loop. Don't use it somewhere that + // performance truly matters + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + public static void ForEach(this Vector vec, Action op) where T : struct + { + for (int i = 0; i < Vector.Count; i++) + op(vec[i], i); + } + } +} diff --git a/tests/src/JIT/config/benchmark/project.json b/tests/src/JIT/config/benchmark/project.json index 0224f89..577d88e 100644 --- a/tests/src/JIT/config/benchmark/project.json +++ b/tests/src/JIT/config/benchmark/project.json @@ -6,8 +6,12 @@ "System.Console": "4.0.0-beta-*", "System.Runtime": "4.0.20-beta-*", "System.Runtime.Extensions": "4.0.10-beta-*", + "System.Runtime.Numerics": "4.0.1-beta-*", "System.Numerics.Vectors": "4.1.1-beta-*", "System.Numerics.Vectors.WindowsRuntime": "4.0.1-beta-*", + "System.Threading": "4.0.10", + "System.Threading.Tasks": "4.0.10", + "System.Threading.Tasks.Parallel": "4.0.0", "xunit": "2.1.0", "xunit.console.netcore": "1.0.2-prerelease-00128", "xunit.runner.utility": "2.1.0" diff --git a/tests/src/JIT/config/benchmark/project.lock.json b/tests/src/JIT/config/benchmark/project.lock.json index 9e3a670..c86ebad 100644 --- a/tests/src/JIT/config/benchmark/project.lock.json +++ b/tests/src/JIT/config/benchmark/project.lock.json @@ -347,6 +347,21 @@ "lib/DNXCore50/System.Runtime.InteropServices.dll": {} } }, + "System.Runtime.Numerics/4.0.1-beta-23516": { + "type": "package", + "dependencies": { + "System.Globalization": "4.0.10", + "System.Resources.ResourceManager": "4.0.0", + "System.Runtime": "4.0.20", + "System.Runtime.Extensions": "4.0.10" + }, + "compile": { + "ref/dotnet5.2/System.Runtime.Numerics.dll": {} + }, + "runtime": { + "lib/dotnet5.4/System.Runtime.Numerics.dll": {} + } + }, "System.Runtime.WindowsRuntime/4.0.0": { "type": "package", "dependencies": { @@ -438,6 +453,25 @@ "lib/DNXCore50/System.Threading.Tasks.dll": {} } }, + "System.Threading.Tasks.Parallel/4.0.0": { + "type": "package", + "dependencies": { + "System.Collections.Concurrent": "4.0.10", + "System.Diagnostics.Debug": "4.0.10", + "System.Diagnostics.Tracing": "4.0.20", + "System.Resources.ResourceManager": "4.0.0", + "System.Runtime": "4.0.20", + "System.Runtime.Extensions": "4.0.10", + "System.Threading": "4.0.10", + "System.Threading.Tasks": "4.0.10" + }, + "compile": { + "ref/dotnet/System.Threading.Tasks.Parallel.dll": {} + }, + "runtime": { + "lib/dotnet/System.Threading.Tasks.Parallel.dll": {} + } + }, "System.Xml.ReaderWriter/4.0.10": { "type": "package", "dependencies": { @@ -1000,6 +1034,21 @@ "lib/DNXCore50/System.Runtime.InteropServices.dll": {} } }, + "System.Runtime.Numerics/4.0.1-beta-23516": { + "type": "package", + "dependencies": { + "System.Globalization": "4.0.10", + "System.Resources.ResourceManager": "4.0.0", + "System.Runtime": "4.0.20", + "System.Runtime.Extensions": "4.0.10" + }, + "compile": { + "ref/dotnet5.2/System.Runtime.Numerics.dll": {} + }, + "runtime": { + "lib/dotnet5.4/System.Runtime.Numerics.dll": {} + } + }, "System.Runtime.WindowsRuntime/4.0.0": { "type": "package", "dependencies": { @@ -1091,6 +1140,25 @@ "lib/DNXCore50/System.Threading.Tasks.dll": {} } }, + "System.Threading.Tasks.Parallel/4.0.0": { + "type": "package", + "dependencies": { + "System.Collections.Concurrent": "4.0.10", + "System.Diagnostics.Debug": "4.0.10", + "System.Diagnostics.Tracing": "4.0.20", + "System.Resources.ResourceManager": "4.0.0", + "System.Runtime": "4.0.20", + "System.Runtime.Extensions": "4.0.10", + "System.Threading": "4.0.10", + "System.Threading.Tasks": "4.0.10" + }, + "compile": { + "ref/dotnet/System.Threading.Tasks.Parallel.dll": {} + }, + "runtime": { + "lib/dotnet/System.Threading.Tasks.Parallel.dll": {} + } + }, "System.Xml.ReaderWriter/4.0.10": { "type": "package", "dependencies": { @@ -1656,6 +1724,21 @@ "lib/DNXCore50/System.Runtime.InteropServices.dll": {} } }, + "System.Runtime.Numerics/4.0.1-beta-23516": { + "type": "package", + "dependencies": { + "System.Globalization": "4.0.10", + "System.Resources.ResourceManager": "4.0.0", + "System.Runtime": "4.0.20", + "System.Runtime.Extensions": "4.0.10" + }, + "compile": { + "ref/dotnet5.2/System.Runtime.Numerics.dll": {} + }, + "runtime": { + "lib/dotnet5.4/System.Runtime.Numerics.dll": {} + } + }, "System.Runtime.WindowsRuntime/4.0.0": { "type": "package", "dependencies": { @@ -1747,6 +1830,25 @@ "lib/DNXCore50/System.Threading.Tasks.dll": {} } }, + "System.Threading.Tasks.Parallel/4.0.0": { + "type": "package", + "dependencies": { + "System.Collections.Concurrent": "4.0.10", + "System.Diagnostics.Debug": "4.0.10", + "System.Diagnostics.Tracing": "4.0.20", + "System.Resources.ResourceManager": "4.0.0", + "System.Runtime": "4.0.20", + "System.Runtime.Extensions": "4.0.10", + "System.Threading": "4.0.10", + "System.Threading.Tasks": "4.0.10" + }, + "compile": { + "ref/dotnet/System.Threading.Tasks.Parallel.dll": {} + }, + "runtime": { + "lib/dotnet/System.Threading.Tasks.Parallel.dll": {} + } + }, "System.Xml.ReaderWriter/4.0.10": { "type": "package", "dependencies": { @@ -2775,6 +2877,46 @@ "System.Runtime.InteropServices.nuspec" ] }, + "System.Runtime.Numerics/4.0.1-beta-23516": { + "type": "package", + "serviceable": true, + "sha512": "CiUlA1qetxoQgHBhr/5PwTSqHZ6g5YgwToRwCk1I8AjPC+8MjwnODZV/4X4AGSfTwbu742OvlHEvB7S7UAed+A==", + "files": [ + "lib/dotnet5.4/System.Runtime.Numerics.dll", + "lib/net45/_._", + "lib/netcore50/System.Runtime.Numerics.dll", + "lib/win8/_._", + "lib/wpa81/_._", + "ref/dotnet5.2/de/System.Runtime.Numerics.xml", + "ref/dotnet5.2/es/System.Runtime.Numerics.xml", + "ref/dotnet5.2/fr/System.Runtime.Numerics.xml", + "ref/dotnet5.2/it/System.Runtime.Numerics.xml", + "ref/dotnet5.2/ja/System.Runtime.Numerics.xml", + "ref/dotnet5.2/ko/System.Runtime.Numerics.xml", + "ref/dotnet5.2/ru/System.Runtime.Numerics.xml", + "ref/dotnet5.2/System.Runtime.Numerics.dll", + "ref/dotnet5.2/System.Runtime.Numerics.xml", + "ref/dotnet5.2/zh-hans/System.Runtime.Numerics.xml", + "ref/dotnet5.2/zh-hant/System.Runtime.Numerics.xml", + "ref/net45/_._", + "ref/netcore50/de/System.Runtime.Numerics.xml", + "ref/netcore50/es/System.Runtime.Numerics.xml", + "ref/netcore50/fr/System.Runtime.Numerics.xml", + "ref/netcore50/it/System.Runtime.Numerics.xml", + "ref/netcore50/ja/System.Runtime.Numerics.xml", + "ref/netcore50/ko/System.Runtime.Numerics.xml", + "ref/netcore50/ru/System.Runtime.Numerics.xml", + "ref/netcore50/System.Runtime.Numerics.dll", + "ref/netcore50/System.Runtime.Numerics.xml", + "ref/netcore50/zh-hans/System.Runtime.Numerics.xml", + "ref/netcore50/zh-hant/System.Runtime.Numerics.xml", + "ref/win8/_._", + "ref/wpa81/_._", + "System.Runtime.Numerics.4.0.1-beta-23516.nupkg", + "System.Runtime.Numerics.4.0.1-beta-23516.nupkg.sha512", + "System.Runtime.Numerics.nuspec" + ] + }, "System.Runtime.WindowsRuntime/4.0.0": { "type": "package", "sha512": "IvSI0X1wIgQ2yFCXnV0EJc1FFE4xxzSPqX1r6ikhcLPuKmXjBglB0IrJBmWAK8vaPkyjBIwf7ks2VSdFazXwhA==", @@ -3004,6 +3146,37 @@ "System.Threading.Tasks.nuspec" ] }, + "System.Threading.Tasks.Parallel/4.0.0": { + "type": "package", + "serviceable": true, + "sha512": "GXDhjPhF3nE4RtDia0W6JR4UMdmhOyt9ibHmsNV6GLRT4HAGqU636Teo4tqvVQOFp2R6b1ffxPXiRaoqtzGxuA==", + "files": [ + "lib/dotnet/System.Threading.Tasks.Parallel.dll", + "lib/net45/_._", + "lib/netcore50/System.Threading.Tasks.Parallel.dll", + "lib/win8/_._", + "lib/wpa81/_._", + "ref/dotnet/de/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/es/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/fr/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/it/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/ja/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/ko/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/ru/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/System.Threading.Tasks.Parallel.dll", + "ref/dotnet/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/zh-hans/System.Threading.Tasks.Parallel.xml", + "ref/dotnet/zh-hant/System.Threading.Tasks.Parallel.xml", + "ref/net45/_._", + "ref/netcore50/System.Threading.Tasks.Parallel.dll", + "ref/netcore50/System.Threading.Tasks.Parallel.xml", + "ref/win8/_._", + "ref/wpa81/_._", + "System.Threading.Tasks.Parallel.4.0.0.nupkg", + "System.Threading.Tasks.Parallel.4.0.0.nupkg.sha512", + "System.Threading.Tasks.Parallel.nuspec" + ] + }, "System.Xml.ReaderWriter/4.0.10": { "type": "package", "serviceable": true, @@ -3247,8 +3420,12 @@ "System.Console >= 4.0.0-beta-*", "System.Runtime >= 4.0.20-beta-*", "System.Runtime.Extensions >= 4.0.10-beta-*", + "System.Runtime.Numerics >= 4.0.1-beta-*", "System.Numerics.Vectors >= 4.1.1-beta-*", "System.Numerics.Vectors.WindowsRuntime >= 4.0.1-beta-*", + "System.Threading >= 4.0.10", + "System.Threading.Tasks >= 4.0.10", + "System.Threading.Tasks.Parallel >= 4.0.0", "xunit >= 2.1.0", "xunit.console.netcore >= 1.0.2-prerelease-00128", "xunit.runner.utility >= 2.1.0" -- 2.7.4