1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 // C# adaptation of C implementation of Livermore Loops Fortran benchmark.
7 /* Livermore Loops coded in C Latest File Modification 20 Oct 92,
8 * by Tim Peters, Kendall Square Res. Corp. tim@ksr.com, ksr!tim@uunet.uu.net
9 * SUBROUTINE KERNEL( TK) replaces the Fortran routine in LFK Test program.
10 ************************************************************************
12 * KERNEL executes 24 samples of "C" computation *
14 * TK(1) - total cpu time to execute only the 24 kernels.*
15 * TK(2) - total Flops executed by the 24 Kernels *
17 ************************************************************************
19 * L. L. N. L. " C " K E R N E L S: M F L O P S *
21 * These kernels measure " C " numerical computation *
22 * rates for a spectrum of cpu-limited computational *
23 * structures or benchmarks. Mathematical through-put *
24 * is measured in units of millions of floating-point *
25 * operations executed per second, called Megaflops/sec. *
27 * Fonzi's Law: There is not now and there never will be a language *
28 * in which it is the least bit difficult to write *
31 ************************************************************************
32 *Originally from Greg Astfalk, AT&T, P.O.Box 900, Princeton, NJ. 08540*
33 * by way of Frank McMahon (LLNL). *
37 * F.H.McMahon, The Livermore Fortran Kernels: *
38 * A Computer Test Of The Numerical Performance Range, *
39 * Lawrence Livermore National Laboratory, *
40 * Livermore, California, UCRL-53745, December 1986. *
42 * from: National Technical Information Service *
43 * U.S. Department of Commerce *
44 * 5285 Port Royal Road *
45 * Springfield, VA. 22161 *
47 * Changes made to correct many array subscripting problems, *
48 * make more readable (added #define's), include the original *
49 * FORTRAN versions of the runs as comments, and make more *
50 * portable by Kelly O'Hair (LLNL) and Chuck Rasbold (LLNL). *
52 ************************************************************************
55 using Microsoft.Xunit.Performance;
57 using System.Runtime.CompilerServices;
60 [assembly: OptimizeForBenchmarks]
62 namespace Benchstone.BenchF
67 public const int Iterations = 1;
69 public const int Iterations = 4000;
72 private const double MaxErr = 1.0e-6;
74 private double[] _x = new double[1002];
75 private double[] _y = new double[1002];
76 private double[] _z = new double[1002];
77 private double[] _u = new double[501];
78 private double[][] _px;
79 private double[][] _cx;
80 private double[][][] _u1;
81 private double[][][] _u2;
82 private double[][][] _u3;
83 private double[][] _b;
84 private double[] _bnk1 = new double[6];
85 private double[][] _c;
86 private double[] _bnk2 = new double[6];
87 private double[][] _p;
88 private double[] _bnk3 = new double[6];
89 private double[][] _h;
90 private double[] _bnk4 = new double[6];
91 private double[] _bnk5 = new double[6];
92 private double[] _ex = new double[68];
93 private double[] _rh = new double[68];
94 private double[] _dex = new double[68];
95 private double[] _vx = new double[151];
96 private double[] _xx = new double[151];
97 private double[] _grd = new double[151];
98 private int[] _e = new int[193];
99 private int[] _f = new int[193];
100 private int[] _nrops = { 0, 5, 10, 2, 2, 2, 2, 16, 36, 17, 9, 1, 1, 7, 11 };
101 private int[] _loops = { 0, 400, 200, 1000, 510, 1000, 1000, 120, 40, 100, 100, 1000, 1000, 128, 150 };
102 private double[] _checks = {
103 0, 0.811986948148e+07, 0.356310000000e+03, 0.356310000000e+03, -0.402412007078e+05,
104 0.136579037764e+06, 0.419716278716e+06,
105 0.429449847526e+07, 0.314064400000e+06,
106 0.182709000000e+07, -0.140415250000e+09,
107 0.374895020500e+09, 0.000000000000e+00,
108 0.171449024000e+06, -0.510829560800e+07
111 public static volatile object VolatileObject;
113 [MethodImpl(MethodImplOptions.NoInlining)]
114 private static void Escape(object obj)
116 VolatileObject = obj;
119 private static T[][] AllocArray<T>(int n1, int n2)
121 T[][] a = new T[n1][];
122 for (int i = 0; i < n1; ++i)
129 private static T[][][] AllocArray<T>(int n1, int n2, int n3)
131 T[][][] a = new T[n1][][];
132 for (int i = 0; i < n1; ++i)
135 for (int j = 0; j < n2; j++)
144 [MethodImpl(MethodImplOptions.NoInlining)]
147 _px = AllocArray<double>(16, 101);
148 _cx = AllocArray<double>(16, 101);
150 _u1 = AllocArray<double>(6, 23, 3);
151 _u2 = AllocArray<double>(6, 23, 3);
152 _u3 = AllocArray<double>(6, 23, 3);
154 _b = AllocArray<double>(65, 9);
155 _c = AllocArray<double>(65, 9);
156 _h = AllocArray<double>(65, 9);
158 _p = AllocArray<double>(5, 513);
160 for (int i = 0; i < Iterations; i++)
162 Main1(i < Iterations - 1 ? 0 : 1);
168 private static int Clock()
173 private void Main1(int output)
175 int nt, lw, nl1, nl2;
176 int i, i1, i2, ip, ir, ix, j, j1, j2, k, kx, ky, l, m;
177 double[] ts = new double[21];
178 double[] rt = new double[21];
179 double[] rpm = new double[21];
180 double[] cksum = new double[21];
181 double r, t, a11, a12, a13, sig, a21, a22, a23, a31, a32, a33;
182 double b28, b27, b26, b25, b24, b23, b22, c0, flx, rx1;
183 double q, s, scale, uu, du1, du2, du3, ar, br, cr, xi, ri;
184 int[] mops = new int[20];
186 for (i = 1; i <= 20; i++)
215 * end of initialization -- begin timing
218 /* loop 1 hydro excerpt */
221 ts[1] = (double)Clock();
223 for (k = 1; k <= 400; k++)
225 _x[k] = q + _y[k] * (r * _z[k + 10] + t * _z[k + 11]);
227 ts[1] = (double)Clock() - ts[1];
228 for (k = 1; k <= 400; k++)
230 cksum[1] += (double)k * _x[k];
233 /* loop 2 mlr, inner product */
236 ts[2] = (double)Clock();
238 for (k = 1; k <= 996; k += 5)
240 q += _z[k] * _x[k] + _z[k + 1] * _x[k + 1] + _z[k + 2] * _x[k + 2] + _z[k + 3] * _x[k + 3] + _z[k + 4] * _x[k + 4];
242 ts[2] = (double)Clock() - ts[2];
245 /* loop 3 inner prod */
248 ts[3] = (double)Clock();
250 for (k = 1; k <= 1000; k++)
254 ts[3] = (double)Clock() - ts[3];
257 /* loop 4 banded linear equarions */
260 ts[4] = (double)Clock();
261 for (l = 7; l <= 107; l += 50)
264 for (j = 30; j <= 870; j += 5)
266 _x[l - 1] -= _x[lw++] * _y[j];
268 _x[l - 1] = _y[5] * _x[l - 1];
270 ts[4] = (double)Clock() - ts[4];
271 for (l = 7; l <= 107; l += 50)
273 cksum[4] += (double)l * _x[l - 1];
276 /* loop 5 tri-diagonal elimination, below diagonal */
279 ts[5] = (double)Clock();
280 for (i = 2; i <= 998; i += 3)
282 _x[i] = _z[i] * (_y[i] - _x[i - 1]);
283 _x[i + 1] = _z[i + 1] * (_y[i + 1] - _x[i]);
284 _x[i + 2] = _z[i + 2] * (_y[i + 2] - _x[i + 1]);
286 ts[5] = (double)Clock() - ts[5];
287 for (i = 2; i <= 1000; i++)
289 cksum[5] += (double)i * _x[i];
292 /* loop 6 tri-diagonal elimination, above diagonal */
295 ts[6] = (double)Clock();
296 for (j = 3; j <= 999; j += 3)
299 _x[i] = _x[i] - _z[i] * _x[i + 1];
300 _x[i - 1] = _x[i - 1] - _z[i - 1] * _x[i];
301 _x[i - 2] = _x[i - 2] - _z[i - 2] * _x[i - 1];
303 ts[6] = (double)Clock() - ts[6];
304 for (j = 1; j <= 999; j++)
307 cksum[6] += (double)j * _x[l];
310 /* loop 7 equation of state excerpt */
313 ts[7] = (double)Clock();
314 for (m = 1; m <= 120; m++)
316 _x[m] = _u[m] + r * (_z[m] + r * _y[m]) + t * (_u[m + 3] + r * (_u[m + 2] + r * _u[m + 1]) + t * (_u[m + 6] + r * (_u[m + 5] + r * _u[m + 4])));
318 ts[7] = (double)Clock() - ts[7];
319 for (m = 1; m <= 120; m++)
321 cksum[7] += (double)m * _x[m];
324 /* loop 8 p.d.e. integration */
327 ts[8] = (double)Clock();
330 for (kx = 2; kx <= 3; kx++)
332 for (ky = 2; ky <= 21; ky++)
334 du1 = _u1[kx][ky + 1][nl1] - _u1[kx][ky - 1][nl1];
335 du2 = _u2[kx][ky + 1][nl1] - _u2[kx][ky - 1][nl1];
336 du3 = _u3[kx][ky + 1][nl1] - _u3[kx][ky - 1][nl1];
337 _u1[kx][ky][nl2] = _u1[kx][ky][nl1] + a11 * du1 + a12 * du2 + a13 * du3 + sig * (_u1[kx + 1][ky][nl1]
338 - 2.0 * _u1[kx][ky][nl1] + _u1[kx - 1][ky][nl1]);
339 _u2[kx][ky][nl2] = _u2[kx][ky][nl1] + a21 * du1 + a22 * du2 + a23 * du3 + sig * (_u2[kx + 1][ky][nl1]
340 - 2.0 * _u2[kx][ky][nl1] + _u2[kx - 1][ky][nl1]);
341 _u3[kx][ky][nl2] = _u3[kx][ky][nl1] + a31 * du1 + a32 * du2 + a33 * du3 + sig * (_u3[kx + 1][ky][nl1]
342 - 2.0 * _u3[kx][ky][nl1] + _u3[kx - 1][ky][nl1]);
345 ts[8] = (double)Clock() - ts[8];
346 for (i = 1; i <= 2; i++)
348 for (kx = 2; kx <= 3; kx++)
350 for (ky = 2; ky <= 21; ky++)
352 cksum[8] += (double)kx * (double)ky * (double)i * (_u1[kx][ky][i] + _u2[kx][ky][i] + _u3[kx][ky][i]);
357 /* loop 9 integrate predictors */
360 ts[9] = (double)Clock();
361 for (i = 1; i <= 100; i++)
363 _px[1][i] = b28 * _px[13][i] + b27 * _px[12][i] + b26 * _px[11][i] + b25 * _px[10][i] + b24 * _px[9][i] +
364 b23 * _px[8][i] + b22 * _px[7][i] + c0 * (_px[5][i] + _px[6][i]) + _px[3][i];
366 ts[9] = (double)Clock() - ts[9];
367 for (i = 1; i <= 100; i++)
369 cksum[9] += (double)i * _px[1][i];
372 /* loop 10 difference predictors */
375 ts[10] = (double)Clock();
376 for (i = 1; i <= 100; i++)
389 ar = cr - _px[10][i];
391 br = ar - _px[11][i];
393 cr = br - _px[12][i];
395 _px[14][i] = cr - _px[13][i];
398 ts[10] = (double)Clock() - ts[10];
399 for (i = 1; i <= 100; i++)
401 for (k = 5; k <= 14; k++)
403 cksum[10] += (double)k * (double)i * _px[k][i];
407 /* loop 11 first sum. */
410 ts[11] = (double)Clock();
412 for (k = 2; k <= 1000; k++)
414 _x[k] = _x[k - 1] + _y[k];
416 ts[11] = (double)Clock() - ts[11];
417 for (k = 1; k <= 1000; k++)
419 cksum[11] += (double)k * _x[k];
422 /* loop 12 first diff. */
425 ts[12] = (double)Clock();
426 for (k = 1; k <= 999; k++)
428 _x[k] = _y[k + 1] - _y[k];
430 ts[12] = (double)Clock() - ts[12];
431 for (k = 1; k <= 999; k++)
433 cksum[12] += (double)k * _x[k];
436 /* loop 13 2-d particle pusher */
439 ts[13] = (double)Clock();
440 for (ip = 1; ip <= 128; ip++)
444 _p[3][ip] += _b[i1][j1];
445 _p[4][ip] += _c[i1][j1];
446 _p[1][ip] += _p[3][ip];
447 _p[2][ip] += _p[4][ip];
448 // Each element of m_p, m_b and m_c is initialized to 1.00025 in Init().
449 // From the assignments above,
450 // i2 = m_p[1][ip] = m_p[1][ip] + m_p[3][ip] = m_p[1][ip] + m_p[3][ip] + m_b[i1][j1] = 1 + 1 + 1 = 3
451 // j2 = m_p[2][ip] = m_p[2][ip] + m_p[4][ip] = m_p[2][ip] + m_p[4][ip] + m_c[i1][j1] = 1 + 1 + 1 = 3
454 // Accessing m_y, m_z upto 35
455 _p[1][ip] += _y[i2 + 32];
456 _p[2][ip] += _z[j2 + 32];
462 ts[13] = (double)Clock() - ts[13];
463 for (ip = 1; ip <= 128; ip++)
465 cksum[13] += (double)ip * (_p[3][ip] + _p[4][ip] + _p[1][ip] + _p[2][ip]);
467 for (k = 1; k <= 64; k++)
469 for (ix = 1; ix <= 8; ix++)
471 cksum[13] += (double)k * (double)ix * _h[k][ix];
475 /* loop 14 1-d particle pusher */
478 ts[14] = (double)Clock();
479 for (k = 1; k <= 150; k++)
481 // m_grd[150] = 13.636
482 // Therefore ix <= 13
485 _vx[k] += _ex[ix] + (_xx[k] - xi) * _dex[ix];
486 _xx[k] += _vx[k] + flx;
490 ir = System.Math.Abs(ir % 64);
492 // ir < 64 since ir = ir % 64
493 // So m_rh is accessed upto 64
494 _rh[ir] += 1.0 - rx1;
497 ts[14] = (double)Clock() - ts[14];
498 for (k = 1; k <= 150; k++)
500 cksum[14] += (double)k * (_vx[k] + _xx[k]);
502 for (k = 1; k <= 67; k++)
504 cksum[14] += (double)k * _rh[k];
507 /* time the clock call */
509 ts[15] = (double)Clock();
510 ts[15] = (double)Clock() - ts[15];
512 /* scale= set to convert time to micro-seconds */
515 rt[15] = ts[15] * scale;
519 for (k = 1; k <= nt; k++)
521 rt[k] = (ts[k] - ts[15]) * scale;
523 mops[k] = _nrops[k] * _loops[k];
524 s += (double)mops[k];
528 rpm[k] = (double)mops[k] / rt[k];
535 // Ensure that the array elements are live-out
547 for (k = 1; k <= 1000; k++)
554 for (k = 1; k <= 500; k++)
559 for (k = 1; k <= 15; k++)
561 for (l = 1; l <= 100; l++)
568 for (j = 1; j < 6; j++)
570 for (k = 1; k < 23; k++)
572 for (l = 1; l < 3; l++)
575 _u2[j][k][l] = k + k;
576 _u3[j][k][l] = k + k + k;
581 for (j = 1; j < 65; j++)
583 for (k = 1; k < 9; k++)
591 for (j = 1; j < 6; j++)
600 for (j = 1; j < 5; j++)
602 for (k = 1; k < 513; k++)
608 for (j = 1; j < 193; j++)
613 for (j = 1; j < 68; j++)
615 _ex[j] = _rh[j] = _dex[j] = (double)j;
618 for (j = 1; j < 151; j++)
622 _grd[j] = (double)(j / 8 + 3);
627 public static void Test()
629 var lloops = new LLoops();
630 foreach (var iteration in Benchmark.Iterations)
632 using (iteration.StartMeasurement())
639 private bool TestBase()
641 bool result = Bench();
645 public static int Main()
647 var lloops = new LLoops();
648 bool result = lloops.TestBase();
649 return (result ? 100 : -1);