modules/gles3/performance/es3pShaderOperatorTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * drawElements Quality Program OpenGL ES 3.0 Module
   3  * -------------------------------------------------
   4  *
   5  * Copyright 2014 The Android Open Source Project
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *      http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  *//*!
  20  * \file
  21  * \brief Shader operator performance tests.
  22  *//*--------------------------------------------------------------------*/
  23
  24 #include "es3pShaderOperatorTests.hpp"
  25 #include "glsCalibration.hpp"
  26 #include "gluShaderUtil.hpp"
  27 #include "gluShaderProgram.hpp"
  28 #include "gluPixelTransfer.hpp"
  29 #include "tcuTestLog.hpp"
  30 #include "tcuRenderTarget.hpp"
  31 #include "tcuCommandLine.hpp"
  32 #include "tcuSurface.hpp"
  33 #include "deStringUtil.hpp"
  34 #include "deSharedPtr.hpp"
  35 #include "deClock.h"
  36 #include "deMath.h"
  37
  38 #include "glwEnums.hpp"
  39 #include "glwFunctions.hpp"
  40
  41 #include <map>
  42 #include <algorithm>
  43 #include <limits>
  44 #include <set>
  45
  46 namespace deqp
  47 {
  48 namespace gles3
  49 {
  50 namespace Performance
  51 {
  52
  53 using namespace gls;
  54 using namespace glu;
  55 using tcu::Vec2;
  56 using tcu::Vec4;
  57 using tcu::TestLog;
  58 using de::SharedPtr;
  59
  60 using std::string;
  61 using std::vector;
  62
  63 #define MEASUREMENT_FAIL() throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__)
  64
  65 // Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument.
  66 static const int        DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD   = 3;
  67 // How many different workload sizes are used by OperatorPerformanceCase.
  68 static const int        NUM_WORKLOADS                                                   = 8;
  69 // Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached.
  70 static const int        MAX_WORKLOAD_SIZE                                               = 1<<29;
  71
  72 // BinaryOpCase-specific constants for shader generation.
  73 static const int        BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS       = 4;
  74 static const int        BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT        = 2;
  75 static const int        BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT          = 4;
  76
  77 // FunctionCase-specific constants for shader generation.
  78 static const int        FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS                      = 4;
  79
  80 static const char* const s_swizzles[][4] =
  81 {
  82         { "x", "yx", "yzx", "wzyx" },
  83         { "y", "zy", "wyz", "xwzy" },
  84         { "z", "wy", "zxy", "yzwx" },
  85         { "w", "xw", "yxw", "zyxw" }
  86 };
  87
  88 template <int N>
  89 static tcu::Vector<float, N> mean (const vector<tcu::Vector<float, N> >& data)
  90 {
  91         tcu::Vector<float, N> sum(0.0f);
  92         for (int i = 0; i < (int)data.size(); i++)
  93                 sum += data[i];
  94         return sum / tcu::Vector<float, N>((float)data.size());
  95 }
  96
  97 static void uniformNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
  98 {
  99         switch (n)
 100         {
 101                 case 1: gl.uniform1fv(location, count, data); break;
 102                 case 2: gl.uniform2fv(location, count, data); break;
 103                 case 3: gl.uniform3fv(location, count, data); break;
 104                 case 4: gl.uniform4fv(location, count, data); break;
 105                 default: DE_ASSERT(false);
 106         }
 107 }
 108
 109 static void uniformNiv (const glw::Functions& gl, int n, int location, int count, const int* data)
 110 {
 111         switch (n)
 112         {
 113                 case 1: gl.uniform1iv(location, count, data); break;
 114                 case 2: gl.uniform2iv(location, count, data); break;
 115                 case 3: gl.uniform3iv(location, count, data); break;
 116                 case 4: gl.uniform4iv(location, count, data); break;
 117                 default: DE_ASSERT(false);
 118         }
 119 }
 120
 121 static void uniformMatrixNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
 122 {
 123         switch (n)
 124         {
 125                 case 2: gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]); break;
 126                 case 3: gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]); break;
 127                 case 4: gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]); break;
 128                 default: DE_ASSERT(false);
 129         }
 130 }
 131
 132 static glu::DataType getDataTypeFloatOrVec (int size)
 133 {
 134         return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size);
 135 }
 136
 137 static int getIterationCountOrDefault (const tcu::CommandLine& cmdLine, int def)
 138 {
 139         const int cmdLineVal = cmdLine.getTestIterationCount();
 140         return cmdLineVal > 0 ? cmdLineVal : def;
 141 }
 142
 143 static string lineParamsString (const LineParameters& params)
 144 {
 145         return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x";
 146 }
 147
 148 namespace
 149 {
 150
 151 /*--------------------------------------------------------------------*//*!
 152  * \brief Abstract class for measuring shader operator performance.
 153  *
 154  * This class draws multiple times with different workload sizes (set
 155  * via a uniform, by subclass). Time for each frame is measured, and the
 156  * slope of the workload size vs frame time data is estimated. This slope
 157  * tells us the estimated increase in frame time caused by a workload
 158  * increase of 1 unit (what 1 workload unit means is up to subclass).
 159  *
 160  * Generally, the shaders contain not just the operation we're interested
 161  * in (e.g. addition) but also some other stuff (e.g. loop overhead). To
 162  * eliminate this cost, we actually do the stuff described in the above
 163  * paragraph with multiple programs (usually two), which contain different
 164  * kinds of workload (e.g. different loop contents). Then we can (in
 165  * theory) compute the cost of just one operation in a subclass-dependent
 166  * manner.
 167  *
 168  * At this point, the result tells us the increase in frame time caused
 169  * by the addition of one operation. Dividing this by the amount of
 170  * draw calls in a frame, and further by the amount of vertices or
 171  * fragments in a draw call, we get the time cost of one operation.
 172  *
 173  * In reality, there sometimes isn't just a trivial linear dependence
 174  * between workload size and frame time. Instead, there tends to be some
 175  * amount of initial "free" operations. That is, it may be that all
 176  * workload sizes below some positive integer C yield the same frame time,
 177  * and only workload sizes beyond C increase the frame time in a supposedly
 178  * linear manner. Graphically, this means that there graph consists of two
 179  * parts: a horizontal left part, and a linearly increasing right part; the
 180  * right part starts where the left parts ends. The principal task of these
 181  * tests is to look at the slope of the increasing right part. Additionally
 182  * an estimate for the amount of initial free operations is calculated.
 183  * Note that it is also normal to get graphs where the horizontal left part
 184  * is of zero width, i.e. there are no free operations.
 185  *//*--------------------------------------------------------------------*/
 186 class OperatorPerformanceCase : public tcu::TestCase
 187 {
 188 public:
 189         enum CaseType
 190         {
 191                 CASETYPE_VERTEX = 0,
 192                 CASETYPE_FRAGMENT,
 193
 194                 CASETYPE_LAST
 195         };
 196
 197         struct InitialCalibration
 198         {
 199                 int initialNumCalls;
 200                 InitialCalibration (void) : initialNumCalls(1) {}
 201         };
 202
 203         typedef SharedPtr<InitialCalibration> InitialCalibrationStorage;
 204
 205                                                                 OperatorPerformanceCase         (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
 206                                                                                                                          CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage);
 207                                                                 ~OperatorPerformanceCase        (void);
 208
 209         void                                            init                                            (void);
 210         void                                            deinit                                          (void);
 211
 212         IterateResult                           iterate                                         (void);
 213
 214         struct AttribSpec
 215         {
 216                 AttribSpec (const char* name_, const tcu::Vec4& p00_, const tcu::Vec4& p01_, const tcu::Vec4& p10_, const tcu::Vec4& p11_)
 217                         : name          (name_)
 218                         , p00           (p00_)
 219                         , p01           (p01_)
 220                         , p10           (p10_)
 221                         , p11           (p11_)
 222                 {
 223                 }
 224
 225                 AttribSpec (void) {}
 226
 227                 std::string             name;
 228                 tcu::Vec4               p00;    //!< Bottom left.
 229                 tcu::Vec4               p01;    //!< Bottom right.
 230                 tcu::Vec4               p10;    //!< Top left.
 231                 tcu::Vec4               p11;    //!< Top right.
 232         };
 233
 234 protected:
 235         struct ProgramContext
 236         {
 237                 string                          vertShaderSource;
 238                 string                          fragShaderSource;
 239                 vector<AttribSpec>      attributes;
 240
 241                 string                          description;
 242
 243                 ProgramContext (void) {}
 244                 ProgramContext (const string& vs, const string& fs, const vector<AttribSpec>& attrs, const string& desc)
 245                         : vertShaderSource(vs), fragShaderSource(fs), attributes(attrs), description(desc) {}
 246         };
 247
 248         virtual vector<ProgramContext>  generateProgramData                                     (void) const = 0;
 249         //! Sets program-specific uniforms that don't depend on the workload size.
 250         virtual void                                    setGeneralUniforms                                      (deUint32 program) const = 0;
 251         //! Sets the uniform(s) that specifies the workload size in the shader.
 252         virtual void                                    setWorkloadSizeUniform                          (deUint32 program, int workload) const = 0;
 253         //! Computes the cost of a single operation, given the workload costs per program.
 254         virtual float                                   computeSingleOperationTime                      (const vector<float>& perProgramWorkloadCosts) const = 0;
 255         //! Logs a human-readable description of what computeSingleOperationTime does.
 256         virtual void                                    logSingleOperationCalculationInfo       (void) const = 0;
 257
 258         glu::RenderContext&                             m_renderCtx;
 259
 260         CaseType                                                m_caseType;
 261
 262 private:
 263         enum State
 264         {
 265                 STATE_CALIBRATING = 0,          //!< Calibrate draw call count, using first program in m_programs, with workload size 1.
 266                 STATE_FIND_HIGH_WORKLOAD,       //!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program.
 267                 STATE_MEASURING,                        //!< Do actual measurements, for each program in m_programs.
 268                 STATE_REPORTING,                        //!< Measurements are done; calculate results and log.
 269                 STATE_FINISHED,                         //!< All done.
 270
 271                 STATE_LAST
 272         };
 273
 274         struct WorkloadRecord
 275         {
 276                 int                             workloadSize;
 277                 vector<float>   frameTimes; //!< In microseconds.
 278
 279                                 WorkloadRecord  (int workloadSize_)                                             : workloadSize(workloadSize_) {}
 280                 bool    operator<               (const WorkloadRecord& other) const             { return this->workloadSize < other.workloadSize; }
 281                 void    addFrameTime    (float time)                                                    { frameTimes.push_back(time); }
 282                 float   getMedianTime   (void) const
 283                 {
 284                         vector<float> times = frameTimes;
 285                         std::sort(times.begin(), times.end());
 286                         return times.size() % 2 == 0 ?
 287                                         (times[times.size()/2-1] + times[times.size()/2])*0.5f :
 288                                         times[times.size()/2];
 289                 }
 290         };
 291
 292         void                                                            prepareProgram                          (int progNdx);                                  //!< Sets attributes and uniforms for m_programs[progNdx].
 293         void                                                            prepareWorkload                         (int progNdx, int workload);    //!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation.
 294         void                                                            prepareNextRound                        (void);                                                 //!< Increases workload and/or updates m_state.
 295         void                                                            render                                          (int numDrawCalls);
 296         deUint64                                                        renderAndMeasure                        (int numDrawCalls);
 297         void                                                            adjustAndLogGridAndViewport     (void);                                                 //!< Log grid and viewport sizes, after possibly reducing them to reduce draw time.
 298
 299         vector<Vec2>                                            getWorkloadMedianDataPoints     (int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ]
 300
 301         const int                                                       m_numMeasurementsPerWorkload;
 302         const int                                                       m_numWorkloads;                         //!< How many different workload sizes are used for measurement for each program.
 303
 304         int                                                                     m_workloadNdx;                          //!< Runs from 0 to m_numWorkloads-1.
 305
 306         int                                                                     m_workloadMeasurementNdx;
 307         vector<vector<WorkloadRecord> >         m_workloadRecordsFindHigh;      //!< The measurements done during STATE_FIND_HIGH_WORKLOAD.
 308         vector<vector<WorkloadRecord> >         m_workloadRecords;                      //!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx.
 309
 310         State                                                           m_state;
 311         int                                                                     m_measureProgramNdx;            //!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured.
 312
 313         vector<int>                                                     m_highWorkloadSizes;            //!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program.
 314
 315         TheilSenCalibrator                                      m_calibrator;
 316         InitialCalibrationStorage                       m_initialCalibrationStorage;
 317
 318         int                                                                     m_viewportWidth;
 319         int                                                                     m_viewportHeight;
 320         int                                                                     m_gridSizeX;
 321         int                                                                     m_gridSizeY;
 322
 323         vector<ProgramContext>                          m_programData;
 324         vector<SharedPtr<ShaderProgram> >       m_programs;
 325
 326         std::vector<deUint32>                           m_attribBuffers;
 327 };
 328
 329 static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y)
 330 {
 331         return v0 + (v2-v0)*x + (v1-v0)*y;
 332 }
 333
 334 static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad)
 335 {
 336         // \note Top left fill rule.
 337         if (x + y < 1.0f)
 338                 return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
 339         else
 340                 return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y);
 341 }
 342
 343 static inline int getNumVertices (int gridSizeX, int gridSizeY)
 344 {
 345         return gridSizeX * gridSizeY * 2 * 3;
 346 }
 347
 348 static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const OperatorPerformanceCase::AttribSpec& spec)
 349 {
 350         const int numComponents = 4;
 351
 352         DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
 353         dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents);
 354
 355         {
 356                 int dstNdx = 0;
 357
 358                 for (int baseY = 0; baseY < gridSizeY; baseY++)
 359                 for (int baseX = 0; baseX < gridSizeX; baseX++)
 360                 {
 361                         const float xf0 = (float)(baseX + 0) / (float)gridSizeX;
 362                         const float yf0 = (float)(baseY + 0) / (float)gridSizeY;
 363                         const float xf1 = (float)(baseX + 1) / (float)gridSizeX;
 364                         const float yf1 = (float)(baseY + 1) / (float)gridSizeY;
 365
 366 #define ADD_VERTEX(XF, YF)                                                                              \
 367         for (int compNdx = 0; compNdx < numComponents; compNdx++)       \
 368                 dst[dstNdx++] = triQuadInterpolate((XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]))
 369
 370                         ADD_VERTEX(xf0, yf0);
 371                         ADD_VERTEX(xf1, yf0);
 372                         ADD_VERTEX(xf0, yf1);
 373
 374                         ADD_VERTEX(xf1, yf0);
 375                         ADD_VERTEX(xf1, yf1);
 376                         ADD_VERTEX(xf0, yf1);
 377
 378 #undef ADD_VERTEX
 379                 }
 380         }
 381 }
 382
 383 static float intersectionX (const gls::LineParameters& a, const gls::LineParameters& b)
 384 {
 385         return (a.offset - b.offset) / (b.coefficient - a.coefficient);
 386 }
 387
 388 static int numDistinctX (const vector<Vec2>& data)
 389 {
 390         std::set<float> xs;
 391         for (int i = 0; i < (int)data.size(); i++)
 392                 xs.insert(data[i].x());
 393         return (int)xs.size();
 394 }
 395
 396 static gls::LineParameters simpleLinearRegression (const vector<Vec2>& data)
 397 {
 398         const Vec2      mid                                     = mean(data);
 399
 400         float           slopeNumerator          = 0.0f;
 401         float           slopeDenominator        = 0.0f;
 402
 403         for (int i = 0; i < (int)data.size(); i++)
 404         {
 405                 const Vec2 diff = data[i] - mid;
 406
 407                 slopeNumerator          += diff.x()*diff.y();
 408                 slopeDenominator        += diff.x()*diff.x();
 409         }
 410
 411         const float slope       = slopeNumerator / slopeDenominator;
 412         const float offset      = mid.y() - slope*mid.x();
 413
 414         return gls::LineParameters(offset, slope);
 415 }
 416
 417 static float simpleLinearRegressionError (const vector<Vec2>& data)
 418 {
 419         if (numDistinctX(data) <= 2)
 420                 return 0.0f;
 421         else
 422         {
 423                 const gls::LineParameters       estimator       = simpleLinearRegression(data);
 424                 float                                           error           = 0.0f;
 425
 426                 for (int i = 0; i < (int)data.size(); i++)
 427                 {
 428                         const float estY = estimator.offset + estimator.coefficient*data[i].x();
 429                         const float diff = estY - data[i].y();
 430                         error += diff*diff;
 431                 }
 432
 433                 return error / (float)data.size();
 434         }
 435 }
 436
 437 static float verticalVariance (const vector<Vec2>& data)
 438 {
 439         if (numDistinctX(data) <= 2)
 440                 return 0.0f;
 441         else
 442         {
 443                 const float             meanY = mean(data).y();
 444                 float                   error = 0.0f;
 445
 446                 for (int i = 0; i < (int)data.size(); i++)
 447                 {
 448                         const float diff = meanY - data[i].y();
 449                         error += diff*diff;
 450                 }
 451
 452                 return error / (float)data.size();
 453         }
 454 }
 455
 456 /*--------------------------------------------------------------------*//*!
 457  * \brief Find the x coord that divides the input data into two slopes.
 458  *
 459  * The operator performance measurements tend to produce results where
 460  * we get small operation counts "for free" (e.g. because the operations
 461  * are performed during some memory transfer overhead or something),
 462  * resulting in a curve with two parts: an initial horizontal line segment,
 463  * and a rising line.
 464  *
 465  * This function finds the x coordinate that divides the input data into
 466  * two parts such that the sum of the mean square errors for the
 467  * least-squares estimated lines for the two parts is minimized, under the
 468  * additional condition that the left line is horizontal.
 469  *
 470  * This function returns a number X s.t. { pt | pt is in data, pt.x >= X }
 471  * is the right line, and the rest of data is the left line.
 472  *//*--------------------------------------------------------------------*/
 473 static float findSlopePivotX (const vector<Vec2>& data)
 474 {
 475         std::set<float> xCoords;
 476         for (int i = 0; i < (int)data.size(); i++)
 477                 xCoords.insert(data[i].x());
 478
 479         float                   lowestError             = std::numeric_limits<float>::infinity();
 480         float                   bestPivotX              = -std::numeric_limits<float>::infinity();
 481
 482         for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX)
 483         {
 484                 vector<Vec2> leftData;
 485                 vector<Vec2> rightData;
 486                 for (int i = 0; i < (int)data.size(); i++)
 487                 {
 488                         if (data[i].x() < *pivotX)
 489                                 leftData.push_back(data[i]);
 490                         else
 491                                 rightData.push_back(data[i]);
 492                 }
 493
 494                 if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it.
 495                         break;
 496
 497                 {
 498                         const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData);
 499
 500                         if (totalError < lowestError)
 501                         {
 502                                 lowestError = totalError;
 503                                 bestPivotX = *pivotX;
 504                         }
 505                 }
 506         }
 507
 508         DE_ASSERT(lowestError < std::numeric_limits<float>::infinity());
 509
 510         return bestPivotX;
 511 }
 512
 513 struct SegmentedEstimator
 514 {
 515         float                                   pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line.
 516         gls::LineParameters             left;
 517         gls::LineParameters             right;
 518         SegmentedEstimator (const gls::LineParameters& l, const gls::LineParameters& r, float pivotX_) : pivotX(pivotX_), left(l), right(r) {}
 519 };
 520
 521 /*--------------------------------------------------------------------*//*!
 522  * \brief Compute line estimators for (potentially) two-segment data.
 523  *
 524  * Splits the given data into left and right parts (using findSlopePivotX)
 525  * and returns the line estimates for them.
 526  *
 527  * Sometimes, however (especially in fragment shader cases) the data is
 528  * in fact not segmented, but a straight line. This function attempts to
 529  * detect if this the case, and if so, sets left.offset = right.offset and
 530  * left.slope = 0, meaning essentially that the initial "flat" part of the
 531  * data has zero width.
 532  *//*--------------------------------------------------------------------*/
 533 static SegmentedEstimator computeSegmentedEstimator (const vector<Vec2>& data)
 534 {
 535         const float             pivotX = findSlopePivotX(data);
 536         vector<Vec2>    leftData;
 537         vector<Vec2>    rightData;
 538
 539         for (int i = 0; i < (int)data.size(); i++)
 540         {
 541                 if (data[i].x() < pivotX)
 542                         leftData.push_back(data[i]);
 543                 else
 544                         rightData.push_back(data[i]);
 545         }
 546
 547         {
 548                 const gls::LineParameters leftLine              = gls::theilSenLinearRegression(leftData);
 549                 const gls::LineParameters rightLine             = gls::theilSenLinearRegression(rightData);
 550
 551                 if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient*0.5f)
 552                 {
 553                         // Left data doesn't seem credible; assume the data is just a single line.
 554                         const gls::LineParameters entireLine = gls::theilSenLinearRegression(data);
 555                         return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine, -std::numeric_limits<float>::infinity());
 556                 }
 557                 else
 558                         return SegmentedEstimator(leftLine, rightLine, pivotX);
 559         }
 560 }
 561
 562 OperatorPerformanceCase::OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
 563                                                                                                   CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage)
 564         : tcu::TestCase                                 (testCtx, tcu::NODETYPE_PERFORMANCE, name, description)
 565         , m_renderCtx                                   (renderCtx)
 566         , m_caseType                                    (caseType)
 567         , m_numMeasurementsPerWorkload  (getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD))
 568         , m_numWorkloads                                (numWorkloads)
 569         , m_workloadNdx                                 (-1)
 570         , m_workloadMeasurementNdx              (-1)
 571         , m_state                                               (STATE_LAST)
 572         , m_measureProgramNdx                   (-1)
 573         , m_initialCalibrationStorage   (initialCalibrationStorage)
 574         , m_viewportWidth                               (caseType == CASETYPE_VERTEX    ? 32    : renderCtx.getRenderTarget().getWidth())
 575         , m_viewportHeight                              (caseType == CASETYPE_VERTEX    ? 32    : renderCtx.getRenderTarget().getHeight())
 576         , m_gridSizeX                                   (caseType == CASETYPE_FRAGMENT  ? 1             : 100)
 577         , m_gridSizeY                                   (caseType == CASETYPE_FRAGMENT  ? 1             : 100)
 578 {
 579         DE_ASSERT(m_numWorkloads > 0);
 580 }
 581
 582 OperatorPerformanceCase::~OperatorPerformanceCase (void)
 583 {
 584         if (!m_attribBuffers.empty())
 585         {
 586                 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
 587                 m_attribBuffers.clear();
 588         }
 589 }
 590
 591 static void logRenderTargetInfo (TestLog& log, const tcu::RenderTarget& renderTarget)
 592 {
 593         log << TestLog::Section("RenderTarget", "Render target")
 594                 << TestLog::Message << "size: " << renderTarget.getWidth() << "x" << renderTarget.getHeight() << TestLog::EndMessage
 595                 << TestLog::Message << "bits:"
 596                                                         << " R" << renderTarget.getPixelFormat().redBits
 597                                                         << " G" << renderTarget.getPixelFormat().greenBits
 598                                                         << " B" << renderTarget.getPixelFormat().blueBits
 599                                                         << " A" << renderTarget.getPixelFormat().alphaBits
 600                                                         << " D" << renderTarget.getDepthBits()
 601                                                         << " S" << renderTarget.getStencilBits()
 602                                                         << TestLog::EndMessage;
 603
 604         if (renderTarget.getNumSamples() != 0)
 605                 log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage;
 606         else
 607                 log << TestLog::Message << "No MSAA" << TestLog::EndMessage;
 608
 609         log << TestLog::EndSection;
 610 }
 611
 612 vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints (int progNdx) const
 613 {
 614         const vector<WorkloadRecord>&   records = m_workloadRecords[progNdx];
 615         vector<Vec2>                                    result;
 616
 617         for (int i = 0; i < (int)records.size(); i++)
 618                 result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime()));
 619
 620         return result;
 621 }
 622
 623 void OperatorPerformanceCase::prepareProgram (int progNdx)
 624 {
 625         DE_ASSERT(progNdx < (int)m_programs.size());
 626         DE_ASSERT(m_programData.size() == m_programs.size());
 627
 628         const glw::Functions&   gl                      = m_renderCtx.getFunctions();
 629         const ShaderProgram&    program         = *m_programs[progNdx];
 630
 631         vector<AttribSpec>              attributes      = m_programData[progNdx].attributes;
 632
 633         attributes.push_back(AttribSpec("a_position",
 634                                                                         Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
 635                                                                         Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
 636                                                                         Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
 637                                                                         Vec4( 1.0f,  1.0f, 0.0f, 1.0f)));
 638
 639         DE_ASSERT(program.isOk());
 640
 641         // Generate vertices.
 642         if (!m_attribBuffers.empty())
 643                 gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
 644         m_attribBuffers.resize(attributes.size(), 0);
 645         gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
 646         GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()");
 647
 648         for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
 649         {
 650                 std::vector<float> vertices;
 651                 generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]);
 652
 653                 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
 654                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW);
 655                 GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data");
 656         }
 657
 658         // Setup attribute bindings.
 659         for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
 660         {
 661                 int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str());
 662
 663                 if (location >= 0)
 664                 {
 665                         gl.enableVertexAttribArray(location);
 666                         gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
 667                         gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
 668                 }
 669         }
 670         GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state");
 671
 672         gl.useProgram(program.getProgram());
 673         setGeneralUniforms(program.getProgram());
 674         gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
 675 }
 676
 677 void OperatorPerformanceCase::prepareWorkload (int progNdx, int workload)
 678 {
 679         setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload);
 680         render(m_calibrator.getCallCount());
 681 }
 682
 683 void OperatorPerformanceCase::prepareNextRound (void)
 684 {
 685         DE_ASSERT(m_state == STATE_CALIBRATING                  ||
 686                           m_state == STATE_FIND_HIGH_WORKLOAD   ||
 687                           m_state == STATE_MEASURING);
 688
 689         TestLog& log = m_testCtx.getLog();
 690
 691         if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
 692         {
 693                 m_measureProgramNdx = 0;
 694                 m_state = STATE_FIND_HIGH_WORKLOAD;
 695         }
 696
 697         if (m_state == STATE_CALIBRATING)
 698                 prepareWorkload(0, 1);
 699         else if (m_state == STATE_FIND_HIGH_WORKLOAD)
 700         {
 701                 vector<WorkloadRecord>& records = m_workloadRecordsFindHigh[m_measureProgramNdx];
 702
 703                 if (records.empty() || records.back().getMedianTime() < 2.0f*records[0].getMedianTime())
 704                 {
 705                         int workloadSize;
 706
 707                         if (records.empty())
 708                                 workloadSize = 1;
 709                         else
 710                         {
 711                                 workloadSize = records.back().workloadSize*2;
 712
 713                                 if (workloadSize > MAX_WORKLOAD_SIZE)
 714                                 {
 715                                         log << TestLog::Message << "Even workload size " << records.back().workloadSize
 716                                                                                         << " doesn't give high enough frame time for program " << m_measureProgramNdx
 717                                                                                         << ". Can't get sensible result." << TestLog::EndMessage;
 718                                         MEASUREMENT_FAIL();
 719                                 }
 720                         }
 721
 722                         records.push_back(WorkloadRecord(workloadSize));
 723                         prepareWorkload(0, workloadSize);
 724                         m_workloadMeasurementNdx = 0;
 725                 }
 726                 else
 727                 {
 728                         m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize;
 729                         m_measureProgramNdx++;
 730
 731                         if (m_measureProgramNdx >= (int)m_programs.size())
 732                         {
 733                                 m_state = STATE_MEASURING;
 734                                 m_workloadNdx = -1;
 735                                 m_measureProgramNdx = 0;
 736                         }
 737
 738                         prepareProgram(m_measureProgramNdx);
 739                         prepareNextRound();
 740                 }
 741         }
 742         else
 743         {
 744                 m_workloadNdx++;
 745
 746                 if (m_workloadNdx < m_numWorkloads)
 747                 {
 748                         DE_ASSERT(m_numWorkloads > 1);
 749                         const int highWorkload  = m_highWorkloadSizes[m_measureProgramNdx];
 750                         const int workload              = highWorkload > m_numWorkloads ?
 751                                                                                 1 + m_workloadNdx*(highWorkload-1)/(m_numWorkloads-1) :
 752                                                                                 1 + m_workloadNdx;
 753
 754                         prepareWorkload(m_measureProgramNdx, workload);
 755
 756                         m_workloadMeasurementNdx = 0;
 757
 758                         m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload));
 759                 }
 760                 else
 761                 {
 762                         m_measureProgramNdx++;
 763
 764                         if (m_measureProgramNdx < (int)m_programs.size())
 765                         {
 766                                 m_workloadNdx = -1;
 767                                 m_workloadMeasurementNdx = 0;
 768                                 prepareProgram(m_measureProgramNdx);
 769                                 prepareNextRound();
 770                         }
 771                         else
 772                                 m_state = STATE_REPORTING;
 773                 }
 774         }
 775 }
 776
 777 void OperatorPerformanceCase::init (void)
 778 {
 779         TestLog&                                log             = m_testCtx.getLog();
 780         const glw::Functions&   gl              = m_renderCtx.getFunctions();
 781
 782         // Validate that we have sane grid and viewport setup.
 783         DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
 784         TCU_CHECK(de::inRange(m_viewportWidth,  1, m_renderCtx.getRenderTarget().getWidth()) &&
 785                           de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight()));
 786
 787         logRenderTargetInfo(log, m_renderCtx.getRenderTarget());
 788
 789         log << TestLog::Message << "Using additive blending." << TestLog::EndMessage;
 790         gl.enable(GL_BLEND);
 791         gl.blendEquation(GL_FUNC_ADD);
 792         gl.blendFunc(GL_ONE, GL_ONE);
 793
 794         // Generate programs.
 795         DE_ASSERT(m_programs.empty());
 796         m_programData = generateProgramData();
 797         DE_ASSERT(!m_programData.empty());
 798
 799         for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++)
 800         {
 801                 const string& vert = m_programData[progNdx].vertShaderSource;
 802                 const string& frag = m_programData[progNdx].fragShaderSource;
 803
 804                 m_programs.push_back(SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag))));
 805
 806                 if (!m_programs.back()->isOk())
 807                 {
 808                         log << *m_programs.back();
 809                         TCU_FAIL("Compile failed");
 810                 }
 811         }
 812
 813         // Log all programs.
 814         for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
 815                 log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx))
 816                                 << TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage
 817                                 << *m_programs[progNdx]
 818                         << TestLog::EndSection;
 819
 820         m_highWorkloadSizes.resize(m_programData.size());
 821         m_workloadRecordsFindHigh.resize(m_programData.size());
 822         m_workloadRecords.resize(m_programData.size());
 823
 824         m_calibrator.clear(CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */,
 825                                                                                         1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
 826         m_state = STATE_CALIBRATING;
 827
 828         prepareProgram(0);
 829         prepareNextRound();
 830 }
 831
 832 void OperatorPerformanceCase::deinit (void)
 833 {
 834         if (!m_attribBuffers.empty())
 835         {
 836                 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
 837                 m_attribBuffers.clear();
 838         }
 839
 840         m_programs.clear();
 841 }
 842
 843 void OperatorPerformanceCase::render (int numDrawCalls)
 844 {
 845         const glw::Functions&   gl                              = m_renderCtx.getFunctions();
 846         const int                               numVertices             = getNumVertices(m_gridSizeX, m_gridSizeY);
 847
 848         for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
 849                 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 850
 851         glu::readPixels(m_renderCtx, 0, 0, tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish().
 852 }
 853
 854 deUint64 OperatorPerformanceCase::renderAndMeasure (int numDrawCalls)
 855 {
 856         const deUint64 startTime = deGetMicroseconds();
 857         render(numDrawCalls);
 858         return deGetMicroseconds() - startTime;
 859 }
 860
 861 void OperatorPerformanceCase::adjustAndLogGridAndViewport (void)
 862 {
 863         TestLog& log = m_testCtx.getLog();
 864
 865         // If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size.
 866         if (m_calibrator.getCallCount() == 1)
 867         {
 868                 const gls::MeasureState&        calibratorMeasure       = m_calibrator.getMeasureState();
 869                 const float                                     drawCallTime            = (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size();
 870                 const float                                     targetDrawCallTime      = m_calibrator.getParameters().targetFrameTimeUs;
 871                 const float                                     targetRatio                     = targetDrawCallTime / drawCallTime;
 872
 873                 if (targetRatio < 0.95f)
 874                 {
 875                         // Reduce grid or viewport size assuming draw call time scales proportionally.
 876                         if (m_caseType == CASETYPE_VERTEX)
 877                         {
 878                                 const float targetRatioSqrt = deFloatSqrt(targetRatio);
 879                                 m_gridSizeX = (int)(targetRatioSqrt * (float)m_gridSizeX);
 880                                 m_gridSizeY = (int)(targetRatioSqrt * (float)m_gridSizeY);
 881                                 TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1, "Can't decrease grid size enough to achieve low-enough draw times");
 882                                 log << TestLog::Message << "Note: triangle grid size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
 883                         }
 884                         else
 885                         {
 886                                 const float targetRatioSqrt = deFloatSqrt(targetRatio);
 887                                 m_viewportWidth  = (int)(targetRatioSqrt * (float)m_viewportWidth);
 888                                 m_viewportHeight = (int)(targetRatioSqrt * (float)m_viewportHeight);
 889                                 TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1, "Can't decrease viewport size enough to achieve low-enough draw times");
 890                                 log << TestLog::Message << "Note: viewport size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
 891                         }
 892                 }
 893         }
 894
 895         prepareProgram(0);
 896
 897         // Log grid and viewport sizes.
 898         log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage;
 899         log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
 900 }
 901
 902 OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate (void)
 903 {
 904         const TheilSenCalibrator::State calibratorState = m_calibrator.getState();
 905
 906         if (calibratorState != TheilSenCalibrator::STATE_FINISHED)
 907         {
 908                 if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
 909                         m_calibrator.recomputeParameters();
 910                 else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
 911                         m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount()));
 912                 else
 913                         DE_ASSERT(false);
 914
 915                 if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
 916                 {
 917                         logCalibrationInfo(m_testCtx.getLog(), m_calibrator);
 918                         adjustAndLogGridAndViewport();
 919                         prepareNextRound();
 920                         m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount();
 921                 }
 922         }
 923         else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING)
 924         {
 925                 if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload)
 926                 {
 927                         vector<WorkloadRecord>& records = m_state == STATE_FIND_HIGH_WORKLOAD ? m_workloadRecordsFindHigh[m_measureProgramNdx] : m_workloadRecords[m_measureProgramNdx];
 928                         records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount()));
 929                         m_workloadMeasurementNdx++;
 930                 }
 931                 else
 932                         prepareNextRound();
 933         }
 934         else
 935         {
 936                 DE_ASSERT(m_state == STATE_REPORTING);
 937
 938                 TestLog&        log                             = m_testCtx.getLog();
 939                 const int       drawCallCount   = m_calibrator.getCallCount();
 940
 941                 {
 942                         // Compute per-program estimators for measurements.
 943                         vector<SegmentedEstimator> estimators;
 944                         for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
 945                                 estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx)));
 946
 947                         // Log measurements and their estimators for all programs.
 948                         for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
 949                         {
 950                                 const SegmentedEstimator&       estimator       = estimators[progNdx];
 951                                 const string                            progNdxStr      = de::toString(progNdx);
 952                                 vector<WorkloadRecord>          records         = m_workloadRecords[progNdx];
 953                                 std::sort(records.begin(), records.end());
 954
 955                                 {
 956                                         const tcu::ScopedLogSection section(log,
 957                                                                                                                 "Program" + progNdxStr + "Measurements",
 958                                                                                                                 "Measurements for program " + progNdxStr);
 959
 960                                         // Sample list of individual frame times.
 961
 962                                         log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes", "Individual frame times")
 963                                                 << TestLog::SampleInfo << TestLog::ValueInfo("Workload",        "Workload",             "",             QP_SAMPLE_VALUE_TAG_PREDICTOR)
 964                                                                                            << TestLog::ValueInfo("FrameTime",   "Frame time",   "us",   QP_SAMPLE_VALUE_TAG_RESPONSE)
 965                                                 << TestLog::EndSampleInfo;
 966
 967                                         for (int i = 0; i < (int)records.size(); i++)
 968                                                 for (int j = 0; j < (int)records[i].frameTimes.size(); j++)
 969                                                         log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j] << TestLog::EndSample;
 970
 971                                         log << TestLog::EndSampleList;
 972
 973                                         // Sample list of median frame times.
 974
 975                                         log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times")
 976                                                 << TestLog::SampleInfo << TestLog::ValueInfo("Workload",                "Workload",                             "",             QP_SAMPLE_VALUE_TAG_PREDICTOR)
 977                                                                                            << TestLog::ValueInfo("MedianFrameTime",     "Median frame time",    "us",   QP_SAMPLE_VALUE_TAG_RESPONSE)
 978                                                 << TestLog::EndSampleInfo;
 979
 980                                         for (int i = 0; i < (int)records.size(); i++)
 981                                                 log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime() << TestLog::EndSample;
 982
 983                                         log << TestLog::EndSampleList;
 984
 985                                         log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate", "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient);
 986
 987                                         if (estimator.pivotX > -std::numeric_limits<float>::infinity())
 988                                                 log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to " << estimator.pivotX
 989                                                                                                 << " seem to form a rising line, and the rest of data points seem to form a near-horizontal line" << TestLog::EndMessage
 990                                                         << TestLog::Message << "Note: the left line is estimated to be " << lineParamsString(estimator.left)
 991                                                                                                 << " and the right line " << lineParamsString(estimator.right) << TestLog::EndMessage;
 992                                         else
 993                                                 log << TestLog::Message << "Note: the data seem to form a single line: " << lineParamsString(estimator.right) << TestLog::EndMessage;
 994                                 }
 995                         }
 996
 997                         for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
 998                         {
 999                                 if (estimators[progNdx].right.coefficient <= 0.0f)
1000                                 {
1001                                         log << TestLog::Message << "Slope of measurements for program " << progNdx << " isn't positive. Can't get sensible result." << TestLog::EndMessage;
1002                                         MEASUREMENT_FAIL();
1003                                 }
1004                         }
1005
1006                         // \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when
1007                         // incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count
1008                         // of R.
1009                         //
1010                         // The measurements of any single program can't tell us the final result (time of single operation),
1011                         // so we use computeSingleOperationTime to compute it from multiple programs' measurements in a
1012                         // subclass-defined manner.
1013                         //
1014                         // After that, microseconds per operation can be calculated as singleOperationTime / (D * R).
1015
1016                         {
1017                                 vector<float>   perProgramSlopes;
1018                                 for (int i = 0; i < (int)m_programs.size(); i++)
1019                                         perProgramSlopes.push_back(estimators[i].right.coefficient);
1020
1021                                 logSingleOperationCalculationInfo();
1022
1023                                 const float             maxSlope                                = *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end());
1024                                 const float             usecsPerFramePerOp              = computeSingleOperationTime(perProgramSlopes);
1025                                 const int               vertexOrFragmentCount   = m_caseType == CASETYPE_VERTEX ?
1026                                                                                                                         getNumVertices(m_gridSizeX, m_gridSizeY) :
1027                                                                                                                         m_viewportWidth*m_viewportHeight;
1028                                 const double    usecsPerDrawCallPerOp   = usecsPerFramePerOp / (double)drawCallCount;
1029                                 const double    usecsPerSingleOp                = usecsPerDrawCallPerOp / (double)vertexOrFragmentCount;
1030                                 const double    megaOpsPerSecond                = (double)(drawCallCount*vertexOrFragmentCount) / usecsPerFramePerOp;
1031                                 const int               numFreeOps                              = de::max(0, (int)deFloatFloor(intersectionX(estimators[0].left,
1032                                                                                                                                                                                                          LineParameters(estimators[0].right.offset,
1033                                                                                                                                                                                                                                         usecsPerFramePerOp))));
1034
1035                                 log << TestLog::Integer("VertexOrFragmentCount",
1036                                                                                 "R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") + " count",
1037                                                                                 "", QP_KEY_TAG_NONE, vertexOrFragmentCount)
1038
1039                                         << TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE, drawCallCount)
1040
1041                                         << TestLog::Integer("VerticesOrFragmentsPerFrame",
1042                                                                                 "R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") + " per frame",
1043                                                                                 "", QP_KEY_TAG_NONE, vertexOrFragmentCount*drawCallCount)
1044
1045                                         << TestLog::Float("TimePerFramePerOp",
1046                                                                           "Estimated cost of R*D " + string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments")
1047                                                                           + " (i.e. one frame) with one shader operation",
1048                                                                           "us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp)
1049
1050                                         << TestLog::Float("TimePerDrawcallPerOp",
1051                                                                           "Estimated cost of one draw call with one shader operation",
1052                                                                           "us", QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp)
1053
1054                                         << TestLog::Float("TimePerSingleOp",
1055                                                                           "Estimated cost of a single shader operation",
1056                                                                           "us", QP_KEY_TAG_TIME, (float)usecsPerSingleOp);
1057
1058                                 // \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs,
1059                                 //               for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The
1060                                 //               following threshold values for accepting a negative or almost-zero result are rather quick and dirty.
1061                                 if (usecsPerFramePerOp <= -0.1f*maxSlope)
1062                                 {
1063                                         log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage;
1064                                         MEASUREMENT_FAIL();
1065                                 }
1066                                 else if (usecsPerFramePerOp <= 0.001*maxSlope)
1067                                 {
1068                                         log << TestLog::Message << "Cost of operation seems to be approximately zero." << TestLog::EndMessage;
1069                                         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1070                                 }
1071                                 else
1072                                 {
1073                                         log << TestLog::Float("OpsPerSecond",
1074                                                                                   "Operations per second",
1075                                                                                   "Million/s", QP_KEY_TAG_PERFORMANCE, (float)megaOpsPerSecond)
1076
1077                                                 << TestLog::Integer("NumFreeOps",
1078                                                                                         "Estimated number of \"free\" operations",
1079                                                                                         "", QP_KEY_TAG_PERFORMANCE, numFreeOps);
1080
1081                                         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str());
1082                                 }
1083
1084                                 m_state = STATE_FINISHED;
1085                         }
1086                 }
1087
1088                 return STOP;
1089         }
1090
1091         return CONTINUE;
1092 }
1093
1094 // Binary operator case.
1095 class BinaryOpCase : public OperatorPerformanceCase
1096 {
1097 public:
1098                                                 BinaryOpCase                            (Context& context, const char* name, const char* description, const char* op,
1099                                                                                                          glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration);
1100
1101 protected:
1102         vector<ProgramContext>  generateProgramData                                     (void) const;
1103         void                                    setGeneralUniforms                                      (deUint32 program) const;
1104         void                                    setWorkloadSizeUniform                          (deUint32 program, int numOperations) const;
1105         float                                   computeSingleOperationTime                      (const vector<float>& perProgramOperationCosts) const;
1106         void                                    logSingleOperationCalculationInfo       (void) const;
1107
1108 private:
1109         enum ProgramID
1110         {
1111                 // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1112                 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1113                 PROGRAM_WITH_BIGGER_LOOP = 0,
1114                 PROGRAM_WITH_SMALLER_LOOP,
1115
1116                 PROGRAM_LAST
1117         };
1118
1119         ProgramContext                  generateSingleProgramData               (ProgramID) const;
1120
1121         const string                    m_op;
1122         const glu::DataType             m_type;
1123         const glu::Precision    m_precision;
1124         const bool                              m_useSwizzle;
1125 };
1126
1127 BinaryOpCase::BinaryOpCase (Context& context, const char* name, const char* description, const char* op,
1128                                                         glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration)
1129         : OperatorPerformanceCase       (context.getTestContext(), context.getRenderContext(), name, description,
1130                                                                  isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1131         , m_op                                          (op)
1132         , m_type                                        (type)
1133         , m_precision                           (precision)
1134         , m_useSwizzle                          (useSwizzle)
1135 {
1136 }
1137
1138 BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData (ProgramID programID) const
1139 {
1140         DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type));
1141
1142         const bool                      isVertexCase    = m_caseType == CASETYPE_VERTEX;
1143         const char* const       precision               = glu::getPrecisionName(m_precision);
1144         const char* const       inputPrecision  = glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision;
1145         const char* const       typeName                = getDataTypeName(m_type);
1146
1147         std::ostringstream      vtx;
1148         std::ostringstream      frag;
1149         std::ostringstream&     op                              = isVertexCase ? vtx : frag;
1150
1151         vtx << "#version 300 es\n";
1152         frag << "#version 300 es\n"
1153                  << "layout (location = 0) out mediump vec4 o_color;\n";
1154
1155         // Attributes.
1156         vtx << "in highp vec4 a_position;\n";
1157         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1158                 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1159
1160         if (isVertexCase)
1161         {
1162                 vtx << "out mediump vec4 v_color;\n";
1163                 frag << "in mediump vec4 v_color;\n";
1164         }
1165         else
1166         {
1167                 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1168                 {
1169                         vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1170                         frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1171                 }
1172         }
1173
1174         op << "uniform mediump int u_numLoopIterations;\n";
1175         if (isVertexCase)
1176                 op << "uniform mediump float u_zero;\n";
1177
1178         vtx << "\n";
1179         vtx << "void main()\n";
1180         vtx << "{\n";
1181
1182         if (!isVertexCase)
1183                 vtx << "\tgl_Position = a_position;\n";
1184
1185         frag << "\n";
1186         frag << "void main()\n";
1187         frag << "{\n";
1188
1189         // Expression inputs.
1190         const char* const prefix = isVertexCase ? "a_" : "v_";
1191         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1192         {
1193                 const int       inSize          = getDataTypeScalarSize(m_type);
1194                 const bool      isInt           = de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4);
1195                 const bool      cast            = isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4);
1196
1197                 op << "\t" << precision << " " << typeName << " in" << i << " = ";
1198
1199                 if (cast)
1200                         op << typeName << "(";
1201
1202                 op << prefix << "in" << i;
1203
1204                 if (m_useSwizzle)
1205                         op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize-1];
1206
1207                 if (cast)
1208                         op << ")";
1209
1210                 op << ";\n";
1211         }
1212
1213         // Operation accumulation variables.
1214         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1215         {
1216                 op << "\t" << precision << " " << typeName << " acc" << i << "a" << " = in" << i+0 << ";\n";
1217                 op << "\t" << precision << " " << typeName << " acc" << i << "b" << " = in" << i+1 << ";\n";
1218         }
1219
1220         // Loop, with expressions in it.
1221         op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1222         op << "\t{\n";
1223         {
1224                 const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ? BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT : BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1225                 for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++)
1226                 {
1227                         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1228                         {
1229                                 if (i > 0 || unrollNdx > 0)
1230                                         op << "\n";
1231                                 op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a" << ";\n";
1232                                 op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b" << ";\n";
1233                         }
1234                 }
1235         }
1236         op << "\t}\n";
1237         op << "\n";
1238
1239         // Result variable (sum of accumulation variables).
1240         op << "\t" << precision << " " << typeName << " res =";
1241         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1242                 op << (i > 0 ? " "+m_op : "") << " acc" << i << "b";
1243         op << ";\n";
1244
1245         // Convert to color.
1246         op << "\tmediump vec4 color = ";
1247         if (m_type == TYPE_FLOAT_VEC4)
1248                 op << "res";
1249         else
1250         {
1251                 int size = getDataTypeScalarSize(m_type);
1252                 op << "vec4(res";
1253
1254                 for (int i = size; i < 4; i++)
1255                         op << ", " << (i == 3 ? "1.0" : "0.0");
1256
1257                 op << ")";
1258         }
1259         op << ";\n";
1260         op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1261
1262         if (isVertexCase)
1263         {
1264                 vtx << "        gl_Position = a_position + u_zero*color;\n";
1265                 frag << "       o_color = v_color;\n";
1266         }
1267         else
1268         {
1269                 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1270                         vtx << "        v_in" << i << " = a_in" << i << ";\n";
1271         }
1272
1273         vtx << "}\n";
1274         frag << "}\n";
1275
1276         {
1277                 vector<AttribSpec> attributes;
1278                 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1279                         attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
1280                                                                                         Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1281                                                                                         Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1282                                                                                         Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1283                                                                                         Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4)));
1284
1285                 {
1286                         string description = "This is the program with the ";
1287
1288                         description += programID == PROGRAM_WITH_SMALLER_LOOP   ? "smaller"
1289                                                  : programID == PROGRAM_WITH_BIGGER_LOOP        ? "bigger"
1290                                                  : DE_NULL;
1291
1292                         description += " loop.\n"
1293                                                    "Note: workload size for this program means the number of loop iterations.";
1294
1295                         return ProgramContext(vtx.str(), frag.str(), attributes, description);
1296                 }
1297         }
1298 }
1299
1300 vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData (void) const
1301 {
1302         vector<ProgramContext> progData;
1303         for (int i = 0; i < PROGRAM_LAST; i++)
1304                 progData.push_back(generateSingleProgramData((ProgramID)i));
1305         return progData;
1306 }
1307
1308 void BinaryOpCase::setGeneralUniforms (deUint32 program) const
1309 {
1310         const glw::Functions& gl = m_renderCtx.getFunctions();
1311         gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1312 }
1313
1314 void BinaryOpCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
1315 {
1316         const glw::Functions& gl = m_renderCtx.getFunctions();
1317         gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations);
1318 }
1319
1320 float BinaryOpCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
1321 {
1322         DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1323
1324         const int               baseNumOpsInsideLoop                            = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1325         const int               numOpsInsideLoopInSmallProgram          = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1326         const int               numOpsInsideLoopInBigProgram            = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1327         DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram);
1328         const int               opDiff                                                          = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1329         const float             programOperationCostDiff                        = perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP];
1330
1331         return programOperationCostDiff / (float)opDiff;
1332 }
1333
1334 void BinaryOpCase::logSingleOperationCalculationInfo (void) const
1335 {
1336         const int                       baseNumOpsInsideLoop                    = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1337         const int                       numOpsInsideLoopInSmallProgram  = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1338         const int                       numOpsInsideLoopInBigProgram    = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1339         const int                       opDiff                                                  = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1340         const char* const       opName                                                  = m_op == "+" ? "addition"
1341                                                                                                                 : m_op == "-" ? "subtraction"
1342                                                                                                                 : m_op == "*" ? "multiplication"
1343                                                                                                                 : m_op == "/" ? "division"
1344                                                                                                                 : DE_NULL;
1345         DE_ASSERT(opName != DE_NULL);
1346
1347         m_testCtx.getLog() << TestLog::Message << "Note: the bigger program contains " << opDiff << " more "
1348                                                                                    << opName << " operations in one loop iteration than the small program; "
1349                                                                                    << "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff
1350                                                                                    << TestLog::EndMessage;
1351 }
1352
1353 // Built-in function case.
1354 class FunctionCase : public OperatorPerformanceCase
1355 {
1356 public:
1357         enum
1358         {
1359                 MAX_PARAMS = 3
1360         };
1361
1362                                                 FunctionCase                    (Context&                                                       context,
1363                                                                                                  const char*                                            name,
1364                                                                                                  const char*                                            description,
1365                                                                                                  const char*                                            func,
1366                                                                                                  glu::DataType                                          returnType,
1367                                                                                                  const glu::DataType                            paramTypes[MAX_PARAMS],
1368                                                                                                  const Vec4&                                            attribute,
1369                                                                                                  int                                                            modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative.
1370                                                                                                  bool                                                           useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'.
1371                                                                                                  glu::Precision                                         precision,
1372                                                                                                  bool                                                           isVertex,
1373                                                                                                  const InitialCalibrationStorage&       initialCalibration);
1374
1375 protected:
1376         vector<ProgramContext>  generateProgramData                                     (void) const;
1377         void                                    setGeneralUniforms                                      (deUint32 program) const;
1378         void                                    setWorkloadSizeUniform                          (deUint32 program, int numOperations) const;
1379         float                                   computeSingleOperationTime                      (const vector<float>& perProgramOperationCosts) const;
1380         void                                    logSingleOperationCalculationInfo       (void) const;
1381
1382 private:
1383         enum ProgramID
1384         {
1385                 // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1386                 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1387                 PROGRAM_WITH_FUNCTION_CALLS = 0,
1388                 PROGRAM_WITHOUT_FUNCTION_CALLS,
1389
1390                 PROGRAM_LAST
1391         };
1392
1393         //! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum.
1394         static string           sumExpr                                         (const string& aExpr, const string& bExpr, glu::DataType type);
1395         //! Forms an expression used to increment an input value in the shader. If type is boolean, this is just
1396         //! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index,
1397         //! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation.
1398         static string           incrementExpr                           (const string& baseExpr, glu::DataType type, bool divide);
1399
1400         ProgramContext          generateSingleProgramData       (ProgramID) const;
1401
1402         const string                    m_func;
1403         const glu::DataType             m_returnType;
1404         glu::DataType                   m_paramTypes[MAX_PARAMS];
1405         // \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a
1406         //               compile-time constant (2.0) is added. This is a quick and dirty way to deal with
1407         //               functions like clamp or smoothstep that require that a certain parameter is
1408         //               greater than a certain other parameter.
1409         const int                               m_modifyParamNdx;
1410         // \note m_useNearlyConstantInputs determines whether the inputs given to the function
1411         //               should increase (w.r.t m_attribute) only by very small amounts. This is relevant
1412         //               for functions like asin, which requires its inputs to be in a specific range.
1413         //               In practice, this affects whether expressions used to increment the input
1414         //               variables use division instead of multiplication; normally, multiplication is used,
1415         //               but it's hard to keep the increments very small that way, and division shouldn't
1416         //               be the default, since for many functions (probably not asin, luckily), division
1417         //               is too heavy and dominates time-wise.
1418         const bool                              m_useNearlyConstantInputs;
1419         const Vec4                              m_attribute;
1420         const glu::Precision    m_precision;
1421 };
1422
1423 FunctionCase::FunctionCase (Context&                                                    context,
1424                                                         const char*                                                     name,
1425                                                         const char*                                                     description,
1426                                                         const char*                                                     func,
1427                                                         glu::DataType                                           returnType,
1428                                                         const glu::DataType                                     paramTypes[MAX_PARAMS],
1429                                                         const Vec4&                                                     attribute,
1430                                                         int                                                                     modifyParamNdx,
1431                                                         bool                                                            useNearlyConstantInputs,
1432                                                         glu::Precision                                          precision,
1433                                                         bool                                                            isVertex,
1434                                                         const InitialCalibrationStorage&        initialCalibration)
1435         : OperatorPerformanceCase       (context.getTestContext(), context.getRenderContext(), name, description,
1436                                                                  isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1437         , m_func                                        (func)
1438         , m_returnType                          (returnType)
1439         , m_modifyParamNdx                      (modifyParamNdx)
1440         , m_useNearlyConstantInputs     (useNearlyConstantInputs)
1441         , m_attribute                           (attribute)
1442         , m_precision                           (precision)
1443 {
1444         for (int i = 0; i < MAX_PARAMS; i++)
1445                 m_paramTypes[i] = paramTypes[i];
1446 }
1447
1448 string FunctionCase::sumExpr (const string& aExpr, const string& bExpr, glu::DataType type)
1449 {
1450         if (glu::isDataTypeBoolOrBVec(type))
1451         {
1452                 if (type == glu::TYPE_BOOL)
1453                         return "(" + aExpr + " == " + bExpr + ")";
1454                 else
1455                         return "equal(" + aExpr + ", " + bExpr + ")";
1456         }
1457         else
1458                 return "(" + aExpr + " + " + bExpr + ")";
1459 }
1460
1461 string FunctionCase::incrementExpr (const string& baseExpr, glu::DataType type, bool divide)
1462 {
1463         const string mulOrDiv = divide ? "/" : "*";
1464
1465         return glu::isDataTypeBoolOrBVec(type)  ? baseExpr
1466                  : glu::isDataTypeIntOrIVec(type)       ? "(" + baseExpr + mulOrDiv + "(i+1))"
1467                  :                                                                        "(" + baseExpr + mulOrDiv + "float(i+1))";
1468 }
1469
1470 FunctionCase::ProgramContext FunctionCase::generateSingleProgramData (ProgramID programID) const
1471 {
1472         const bool                      isVertexCase                    = m_caseType == CASETYPE_VERTEX;
1473         const char* const       precision                               = glu::getPrecisionName(m_precision);
1474         const char* const       returnTypeName                  = getDataTypeName(m_returnType);
1475         const string            returnPrecisionMaybe    = glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " ";
1476         const char*                     inputPrecision                  = DE_NULL;
1477         const bool                      isMatrixReturn                  = isDataTypeMatrix(m_returnType);
1478         int                                     numParams                               = 0;
1479         const char*                     paramTypeNames[MAX_PARAMS];
1480         string                          paramPrecisionsMaybe[MAX_PARAMS];
1481
1482         for (int i = 0; i < MAX_PARAMS; i++)
1483         {
1484                 paramTypeNames[i]                       = getDataTypeName(m_paramTypes[i]);
1485                 paramPrecisionsMaybe[i]         = glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " ";
1486
1487                 if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP)
1488                         inputPrecision = "mediump";
1489
1490                 if (m_paramTypes[i] != TYPE_INVALID)
1491                         numParams = i+1;
1492         }
1493
1494         DE_ASSERT(numParams > 0);
1495
1496         if (inputPrecision == DE_NULL)
1497                 inputPrecision = precision;
1498
1499         int                                             numAttributes   = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1;
1500         std::ostringstream              vtx;
1501         std::ostringstream              frag;
1502         std::ostringstream&             op                              = isVertexCase ? vtx : frag;
1503
1504         vtx << "#version 300 es\n";
1505         frag << "#version 300 es\n"
1506                  << "layout (location = 0) out mediump vec4 o_color;\n";
1507
1508         // Attributes.
1509         vtx << "in highp vec4 a_position;\n";
1510         for (int i = 0; i < numAttributes; i++)
1511                 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1512
1513         if (isVertexCase)
1514         {
1515                 vtx << "out mediump vec4 v_color;\n";
1516                 frag << "in mediump vec4 v_color;\n";
1517         }
1518         else
1519         {
1520                 for (int i = 0; i < numAttributes; i++)
1521                 {
1522                         vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1523                         frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1524                 }
1525         }
1526
1527         op << "uniform mediump int u_numLoopIterations;\n";
1528         if (isVertexCase)
1529                 op << "uniform mediump float u_zero;\n";
1530
1531         for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1532                 op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc" << (char)('A'+paramNdx) << ";\n";
1533
1534         vtx << "\n";
1535         vtx << "void main()\n";
1536         vtx << "{\n";
1537
1538         if (!isVertexCase)
1539                 vtx << "\tgl_Position = a_position;\n";
1540
1541         frag << "\n";
1542         frag << "void main()\n";
1543         frag << "{\n";
1544
1545         // Function call input and return value accumulation variables.
1546         {
1547                 const char* const inPrefix = isVertexCase ? "a_" : "v_";
1548
1549                 for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1550                 {
1551                         for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1552                         {
1553                                 const glu::DataType             paramType       = m_paramTypes[paramNdx];
1554                                 const bool                              mustCast        = paramType != glu::TYPE_FLOAT_VEC4;
1555
1556                                 op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx << (char)('a'+paramNdx) << " = ";
1557
1558                                 if (mustCast)
1559                                         op << paramTypeNames[paramNdx] << "(";
1560
1561                                 if (glu::isDataTypeMatrix(paramType))
1562                                 {
1563                                         static const char* const        swizzles[3]             = { "x", "xy", "xyz" };
1564                                         const int                                       numRows                 = glu::getDataTypeMatrixNumRows(paramType);
1565                                         const int                                       numCols                 = glu::getDataTypeMatrixNumColumns(paramType);
1566                                         const string                            swizzle                 = numRows < 4 ? string() + "." + swizzles[numRows-1] : "";
1567
1568                                         for (int i = 0; i < numCols; i++)
1569                                                 op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx+paramNdx << swizzle;
1570                                 }
1571                                 else
1572                                 {
1573                                         op << inPrefix << "in" << calcNdx+paramNdx;
1574
1575                                         if (paramNdx == m_modifyParamNdx)
1576                                         {
1577                                                 DE_ASSERT(glu::isDataTypeFloatOrVec(paramType));
1578                                                 op << " + 2.0";
1579                                         }
1580                                 }
1581
1582                                 if (mustCast)
1583                                         op << ")";
1584
1585                                 op << ";\n";
1586                         }
1587
1588                         op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName << "(0);\n";
1589                 }
1590         }
1591
1592         // Loop with expressions in it.
1593         op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1594         op << "\t{\n";
1595         for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1596         {
1597                 if (calcNdx > 0)
1598                         op << "\n";
1599
1600                 op << "\t\t{\n";
1601
1602                 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1603                 {
1604                         const string inputName  = "in" + de::toString(calcNdx) + (char)('a'+inputNdx);
1605                         const string incName    = string() + "u_inc" + (char)('A'+inputNdx);
1606                         const string incExpr    = incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs);
1607
1608                         op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n";
1609                 }
1610
1611                 op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = ";
1612
1613                 if (programID == PROGRAM_WITH_FUNCTION_CALLS)
1614                 {
1615                         op << m_func << "(";
1616
1617                         for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1618                         {
1619                                 if (paramNdx > 0)
1620                                         op << ", ";
1621
1622                                 op << "in" << calcNdx << (char)('a'+paramNdx);
1623                         }
1624
1625                         op << ")";
1626                 }
1627                 else
1628                 {
1629                         DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS);
1630                         op << returnTypeName << "(1)";
1631                 }
1632
1633                 op << ";\n";
1634
1635                 {
1636                         const string resName    = "res" + de::toString(calcNdx);
1637                         const string evalName   = "eval" + de::toString(calcNdx);
1638                         const string incExpr    = incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs);
1639
1640                         op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n";
1641                 }
1642
1643                 op << "\t\t}\n";
1644         }
1645         op << "\t}\n";
1646         op << "\n";
1647
1648         // Result variables.
1649         for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1650         {
1651                 op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A'+inputNdx) << " = ";
1652                 {
1653                         string expr = string() + "in0" + (char)('a'+inputNdx);
1654                         for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1655                                 expr = sumExpr(expr, string() + "in" + de::toString(i) + (char)('a'+inputNdx), m_paramTypes[inputNdx]);
1656                         op << expr;
1657                 }
1658                 op << ";\n";
1659         }
1660
1661         op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = ";
1662         {
1663                 string expr = "res0";
1664                 for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1665                         expr = sumExpr(expr, "res" + de::toString(i), m_returnType);
1666                 op << expr;
1667         }
1668         op << ";\n";
1669
1670         {
1671                 glu::DataType finalResultDataType = glu::TYPE_LAST;
1672
1673                 if (glu::isDataTypeMatrix(m_returnType))
1674                 {
1675                         finalResultDataType = m_returnType;
1676
1677                         op << "\t" << precision << " " << returnTypeName << " finalRes = ";
1678
1679                         for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1680                         {
1681                                 DE_ASSERT(m_paramTypes[inputNdx] == m_returnType);
1682                                 op << "sumIn" << (char)('A'+inputNdx) << " + ";
1683                         }
1684                         op << "sumRes;\n";
1685                 }
1686                 else
1687                 {
1688                         int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType);
1689                         for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1690                                 numFinalResComponents = de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx]));
1691
1692                         finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents);
1693
1694                         {
1695                                 const string finalResType = glu::getDataTypeName(finalResultDataType);
1696                                 op << "\t" << precision << " " << finalResType << " finalRes = ";
1697                                 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1698                                         op << finalResType << "(sumIn" << (char)('A'+inputNdx) << ") + ";
1699                                 op << finalResType << "(sumRes);\n";
1700                         }
1701                 }
1702
1703                 // Convert to color.
1704                 op << "\tmediump vec4 color = ";
1705                 if (finalResultDataType == TYPE_FLOAT_VEC4)
1706                         op << "finalRes";
1707                 else
1708                 {
1709                         int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) : getDataTypeScalarSize(finalResultDataType);
1710
1711                         op << "vec4(";
1712
1713                         if (isMatrixReturn)
1714                         {
1715                                 for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++)
1716                                 {
1717                                         if (i > 0)
1718                                                 op << " + ";
1719                                         op << "finalRes[" << i << "]";
1720                                 }
1721                         }
1722                         else
1723                                 op << "finalRes";
1724
1725                         for (int i = size; i < 4; i++)
1726                                 op << ", " << (i == 3 ? "1.0" : "0.0");
1727
1728                         op << ")";
1729                 }
1730                 op << ";\n";
1731                 op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1732
1733                 if (isVertexCase)
1734                 {
1735                         vtx << "        gl_Position = a_position + u_zero*color;\n";
1736                         frag << "       o_color = v_color;\n";
1737                 }
1738                 else
1739                 {
1740                         for (int i = 0; i < numAttributes; i++)
1741                                 vtx << "        v_in" << i << " = a_in" << i << ";\n";
1742                 }
1743
1744                 vtx << "}\n";
1745                 frag << "}\n";
1746         }
1747
1748         {
1749                 vector<AttribSpec> attributes;
1750                 for (int i = 0; i < numAttributes; i++)
1751                         attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
1752                                                                                         m_attribute.swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1753                                                                                         m_attribute.swizzle((i+1)%4, (i+2)%4, (i+3)%4, (i+0)%4),
1754                                                                                         m_attribute.swizzle((i+2)%4, (i+3)%4, (i+0)%4, (i+1)%4),
1755                                                                                         m_attribute.swizzle((i+3)%4, (i+0)%4, (i+1)%4, (i+2)%4)));
1756
1757                 {
1758                         string description = "This is the program ";
1759
1760                         description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS      ? "without"
1761                                                  : programID == PROGRAM_WITH_FUNCTION_CALLS             ? "with"
1762                                                  : DE_NULL;
1763
1764                         description += " '" + m_func + "' function calls.\n"
1765                                                    "Note: workload size for this program means the number of loop iterations.";
1766
1767                         return ProgramContext(vtx.str(), frag.str(), attributes, description);
1768                 }
1769         }
1770 }
1771
1772 vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData (void) const
1773 {
1774         vector<ProgramContext> progData;
1775         for (int i = 0; i < PROGRAM_LAST; i++)
1776                 progData.push_back(generateSingleProgramData((ProgramID)i));
1777         return progData;
1778 }
1779
1780 void FunctionCase::setGeneralUniforms (deUint32 program) const
1781 {
1782         const glw::Functions& gl = m_renderCtx.getFunctions();
1783
1784         gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1785
1786         for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++)
1787         {
1788                 if (m_paramTypes[paramNdx] != glu::TYPE_INVALID)
1789                 {
1790                         const glu::DataType             paramType       = m_paramTypes[paramNdx];
1791                         const int                               scalarSize      = glu::getDataTypeScalarSize(paramType);
1792                         const int                               location        = gl.getUniformLocation(program, (string() + "u_inc" + (char)('A'+paramNdx)).c_str());
1793
1794                         if (glu::isDataTypeFloatOrVec(paramType))
1795                         {
1796                                 float values[4];
1797                                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1798                                         values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary small values.
1799                                 uniformNfv(gl, scalarSize, location, 1, &values[0]);
1800                         }
1801                         else if (glu::isDataTypeIntOrIVec(paramType))
1802                         {
1803                                 int values[4];
1804                                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1805                                         values[i] = paramNdx*100 + i; // Arbitrary values.
1806                                 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1807                         }
1808                         else if (glu::isDataTypeBoolOrBVec(paramType))
1809                         {
1810                                 int values[4];
1811                                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1812                                         values[i] = (paramNdx >> i) & 1; // Arbitrary values.
1813                                 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1814                         }
1815                         else if (glu::isDataTypeMatrix(paramType))
1816                         {
1817                                 const int size = glu::getDataTypeMatrixNumRows(paramType);
1818                                 DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType));
1819                                 float values[4*4];
1820                                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1821                                         values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary values.
1822                                 uniformMatrixNfv(gl, size, location, 1, &values[0]);
1823                         }
1824                         else
1825                                 DE_ASSERT(false);
1826                 }
1827         }
1828 }
1829
1830 void FunctionCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
1831 {
1832         const glw::Functions&   gl              = m_renderCtx.getFunctions();
1833         const int                               loc             = gl.getUniformLocation(program, "u_numLoopIterations");
1834
1835         gl.uniform1i(loc, numLoopIterations);
1836 }
1837
1838 float FunctionCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
1839 {
1840         DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1841         const int               numFunctionCalls                        = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1842         const float             programOperationCostDiff        = perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] - perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS];
1843
1844         return programOperationCostDiff / (float)numFunctionCalls;
1845 }
1846
1847 void FunctionCase::logSingleOperationCalculationInfo (void) const
1848 {
1849         const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1850
1851         m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains "
1852                                                                                    << numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; "
1853                                                                                    << "cost of one operation is calculated as "
1854                                                                                    << "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls << TestLog::EndMessage;
1855 }
1856
1857 } // anonymous
1858
1859 ShaderOperatorTests::ShaderOperatorTests (Context& context)
1860         : TestCaseGroup(context, "operator", "Operator Performance Tests")
1861 {
1862 }
1863
1864 ShaderOperatorTests::~ShaderOperatorTests (void)
1865 {
1866 }
1867
1868 void ShaderOperatorTests::init (void)
1869 {
1870         // Binary operator cases
1871
1872         static const DataType binaryOpTypes[] =
1873         {
1874                 TYPE_FLOAT,
1875                 TYPE_FLOAT_VEC2,
1876                 TYPE_FLOAT_VEC3,
1877                 TYPE_FLOAT_VEC4,
1878                 TYPE_INT,
1879                 TYPE_INT_VEC2,
1880                 TYPE_INT_VEC3,
1881                 TYPE_INT_VEC4,
1882         };
1883         static const Precision precisions[] =
1884         {
1885                 PRECISION_LOWP,
1886                 PRECISION_MEDIUMP,
1887                 PRECISION_HIGHP
1888         };
1889         static const struct
1890         {
1891                 const char*             name;
1892                 const char*             op;
1893                 bool                    swizzle;
1894         } binaryOps[] =
1895         {
1896                 { "add",                "+",            false   },
1897                 { "sub",                "-",            true    },
1898                 { "mul",                "*",            false   },
1899                 { "div",                "/",            true    }
1900         };
1901
1902         tcu::TestCaseGroup* const binaryOpsGroup = new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests");
1903         addChild(binaryOpsGroup);
1904
1905         for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++)
1906         {
1907                 tcu::TestCaseGroup* const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, "");
1908                 binaryOpsGroup->addChild(opGroup);
1909
1910                 for (int isFrag = 0; isFrag <= 1; isFrag++)
1911                 {
1912                         const BinaryOpCase::InitialCalibrationStorage   shaderGroupCalibrationStorage   (new BinaryOpCase::InitialCalibration);
1913                         const bool                                                                              isVertex                                                = isFrag == 0;
1914                         tcu::TestCaseGroup* const                                               shaderGroup                                             = new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", "");
1915                         opGroup->addChild(shaderGroup);
1916
1917                         for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++)
1918                         {
1919                                 for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++)
1920                                 {
1921                                         const DataType          type                    = binaryOpTypes[typeNdx];
1922                                         const Precision         precision               = precisions[precNdx];
1923                                         const char* const       op                              = binaryOps[opNdx].op;
1924                                         const bool                      useSwizzle              = binaryOps[opNdx].swizzle;
1925                                         std::ostringstream      name;
1926
1927                                         name << getPrecisionName(precision) << "_" << getDataTypeName(type);
1928
1929                                         shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision, useSwizzle, isVertex, shaderGroupCalibrationStorage));
1930                                 }
1931                         }
1932                 }
1933         }
1934
1935         // Built-in function cases.
1936
1937         // Non-specific (i.e. includes gentypes) parameter types for the functions.
1938         enum ValueType
1939         {
1940                 VALUE_NONE                      = 0,
1941                 VALUE_FLOAT                     = (1<<0),       // float scalar
1942                 VALUE_FLOAT_VEC         = (1<<1),       // float vector
1943                 VALUE_FLOAT_VEC34       = (1<<2),       // float vector of size 3 or 4
1944                 VALUE_FLOAT_GENTYPE     = (1<<3),       // float scalar/vector
1945                 VALUE_VEC3                      = (1<<4),       // vec3 only
1946                 VALUE_VEC4                      = (1<<5),       // vec4 only
1947                 VALUE_MATRIX            = (1<<6),       // matrix
1948                 VALUE_BOOL                      = (1<<7),       // boolean scalar
1949                 VALUE_BOOL_VEC          = (1<<8),       // boolean vector
1950                 VALUE_BOOL_VEC4         = (1<<9),       // bvec4 only
1951                 VALUE_BOOL_GENTYPE      = (1<<10),      // boolean scalar/vector
1952                 VALUE_INT                       = (1<<11),      // int scalar
1953                 VALUE_INT_VEC           = (1<<12),      // int vector
1954                 VALUE_INT_VEC4          = (1<<13),      // ivec4 only
1955                 VALUE_INT_GENTYPE       = (1<<14),      // int scalar/vector
1956
1957                 // Shorthands.
1958                 N                               = VALUE_NONE,
1959                 F                               = VALUE_FLOAT,
1960                 FV                              = VALUE_FLOAT_VEC,
1961                 VL                              = VALUE_FLOAT_VEC34, // L for "large"
1962                 GT                              = VALUE_FLOAT_GENTYPE,
1963                 V3                              = VALUE_VEC3,
1964                 V4                              = VALUE_VEC4,
1965                 M                               = VALUE_MATRIX,
1966                 B                               = VALUE_BOOL,
1967                 BV                              = VALUE_BOOL_VEC,
1968                 B4                              = VALUE_BOOL_VEC4,
1969                 BGT                             = VALUE_BOOL_GENTYPE,
1970                 I                               = VALUE_INT,
1971                 IV                              = VALUE_INT_VEC,
1972                 I4                              = VALUE_INT_VEC4,
1973                 IGT                             = VALUE_INT_GENTYPE,
1974
1975                 VALUE_ANY_FLOAT                 = VALUE_FLOAT           |       VALUE_FLOAT_VEC         |       VALUE_FLOAT_GENTYPE             | VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34,
1976                 VALUE_ANY_INT                   = VALUE_INT                     |       VALUE_INT_VEC           |       VALUE_INT_GENTYPE               | VALUE_INT_VEC4,
1977                 VALUE_ANY_BOOL                  = VALUE_BOOL            |       VALUE_BOOL_VEC          |       VALUE_BOOL_GENTYPE              | VALUE_BOOL_VEC4,
1978
1979                 VALUE_ANY_GENTYPE               = VALUE_FLOAT_VEC       |       VALUE_FLOAT_GENTYPE     |       VALUE_FLOAT_VEC34       |
1980                                                                   VALUE_BOOL_VEC        |       VALUE_BOOL_GENTYPE      |
1981                                                                   VALUE_INT_VEC         |       VALUE_INT_GENTYPE       |
1982                                                                   VALUE_MATRIX
1983         };
1984         enum PrecisionMask
1985         {
1986                 PRECMASK_NA                             = 0,                                            //!< Precision not applicable (booleans)
1987                 PRECMASK_LOWP                   = (1<<PRECISION_LOWP),
1988                 PRECMASK_MEDIUMP                = (1<<PRECISION_MEDIUMP),
1989                 PRECMASK_HIGHP                  = (1<<PRECISION_HIGHP),
1990
1991                 PRECMASK_MEDIUMP_HIGHP  = (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP),
1992                 PRECMASK_ALL                    = (1<<PRECISION_LOWP) | (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP)
1993         };
1994
1995         static const DataType floatTypes[] =
1996         {
1997                 TYPE_FLOAT,
1998                 TYPE_FLOAT_VEC2,
1999                 TYPE_FLOAT_VEC3,
2000                 TYPE_FLOAT_VEC4
2001         };
2002         static const DataType intTypes[] =
2003         {
2004                 TYPE_INT,
2005                 TYPE_INT_VEC2,
2006                 TYPE_INT_VEC3,
2007                 TYPE_INT_VEC4
2008         };
2009         static const DataType boolTypes[] =
2010         {
2011                 TYPE_BOOL,
2012                 TYPE_BOOL_VEC2,
2013                 TYPE_BOOL_VEC3,
2014                 TYPE_BOOL_VEC4
2015         };
2016         static const DataType matrixTypes[] =
2017         {
2018                 TYPE_FLOAT_MAT2,
2019                 TYPE_FLOAT_MAT3,
2020                 TYPE_FLOAT_MAT4
2021         };
2022
2023         tcu::TestCaseGroup* const angleAndTrigonometryGroup             = new tcu::TestCaseGroup(m_testCtx, "angle_and_trigonometry",   "Built-In Angle and Trigonometry Function Performance Tests");
2024         tcu::TestCaseGroup* const exponentialGroup                              = new tcu::TestCaseGroup(m_testCtx, "exponential",                              "Built-In Exponential Function Performance Tests");
2025         tcu::TestCaseGroup* const commonFunctionsGroup                  = new tcu::TestCaseGroup(m_testCtx, "common_functions",                 "Built-In Common Function Performance Tests");
2026         tcu::TestCaseGroup* const geometricFunctionsGroup               = new tcu::TestCaseGroup(m_testCtx, "geometric",                                "Built-In Geometric Function Performance Tests");
2027         tcu::TestCaseGroup* const matrixFunctionsGroup                  = new tcu::TestCaseGroup(m_testCtx, "matrix",                                   "Built-In Matrix Function Performance Tests");
2028         tcu::TestCaseGroup* const floatCompareGroup                             = new tcu::TestCaseGroup(m_testCtx, "float_compare",                    "Built-In Floating Point Comparison Function Performance Tests");
2029         tcu::TestCaseGroup* const intCompareGroup                               = new tcu::TestCaseGroup(m_testCtx, "int_compare",                              "Built-In Integer Comparison Function Performance Tests");
2030         tcu::TestCaseGroup* const boolCompareGroup                              = new tcu::TestCaseGroup(m_testCtx, "bool_compare",                             "Built-In Boolean Comparison Function Performance Tests");
2031
2032         addChild(angleAndTrigonometryGroup);
2033         addChild(exponentialGroup);
2034         addChild(commonFunctionsGroup);
2035         addChild(geometricFunctionsGroup);
2036         addChild(matrixFunctionsGroup);
2037         addChild(floatCompareGroup);
2038         addChild(intCompareGroup);
2039         addChild(boolCompareGroup);
2040
2041         // Some attributes to be used as parameters for the functions.
2042         const Vec4 attrPos              = Vec4( 2.3f,  1.9f,  0.8f,  0.7f);
2043         const Vec4 attrNegPos   = Vec4(-1.3f,  2.5f, -3.5f,      4.3f);
2044         const Vec4 attrSmall    = Vec4(-0.9f,  0.8f, -0.4f,      0.2f);
2045         const Vec4 attrBig              = Vec4( 1.3f,  2.4f,  3.0f,      4.0f);
2046
2047         // \todo The following functions and variants are missing, and should be added in the future:
2048         //               - modf (has an output parameter, not currently handled by test code)
2049         //               - functions with uint/uvec* return or parameter types
2050         //               - non-matrix <-> matrix functions (outerProduct etc.)
2051         // \note Remember to update test spec when these are added.
2052
2053         // Function name, return type and parameter type information; also, what attribute should be used in the test.
2054         // \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array.
2055         // \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function.
2056         static const struct
2057         {
2058                 tcu::TestCaseGroup*                                     parentGroup;
2059                 const char*                                                     groupName;
2060                 const char*                                                     func;
2061                 const ValueType                                         types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order.
2062                 const Vec4&                                                     attribute;
2063                 int                                                                     modifyParamNdx;
2064                 bool                                                            useNearlyConstantInputs;
2065                 bool                                                            booleanCase;
2066                 PrecisionMask                                           precMask;
2067         } functionCaseGroups[] =
2068         {
2069                 { angleAndTrigonometryGroup,    "radians",                      "radians",                      { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2070                 { angleAndTrigonometryGroup,    "degrees",                      "degrees",                      { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2071                 { angleAndTrigonometryGroup,    "sin",                          "sin",                          { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2072                 { angleAndTrigonometryGroup,    "cos",                          "cos",                          { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2073                 { angleAndTrigonometryGroup,    "tan",                          "tan",                          { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2074                 { angleAndTrigonometryGroup,    "asin",                         "asin",                         { F,  F,  N,  N  }, attrSmall,          -1, true,       false,  PRECMASK_ALL                    },
2075                 { angleAndTrigonometryGroup,    "acos",                         "acos",                         { F,  F,  N,  N  }, attrSmall,          -1, true,       false,  PRECMASK_ALL                    },
2076                 { angleAndTrigonometryGroup,    "atan2",                        "atan",                         { F,  F,  F,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2077                 { angleAndTrigonometryGroup,    "atan",                         "atan",                         { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2078                 { angleAndTrigonometryGroup,    "sinh",                         "sinh",                         { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2079                 { angleAndTrigonometryGroup,    "cosh",                         "cosh",                         { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2080                 { angleAndTrigonometryGroup,    "tanh",                         "tanh",                         { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2081                 { angleAndTrigonometryGroup,    "asinh",                        "asinh",                        { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2082                 { angleAndTrigonometryGroup,    "acosh",                        "acosh",                        { F,  F,  N,  N  }, attrBig,            -1, false,      false,  PRECMASK_ALL                    },
2083                 { angleAndTrigonometryGroup,    "atanh",                        "atanh",                        { F,  F,  N,  N  }, attrSmall,          -1, true,       false,  PRECMASK_ALL                    },
2084
2085                 { exponentialGroup,                             "pow",                          "pow",                          { F,  F,  F,  N  }, attrPos,            -1, false,      false,  PRECMASK_ALL                    },
2086                 { exponentialGroup,                             "exp",                          "exp",                          { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2087                 { exponentialGroup,                             "log",                          "log",                          { F,  F,  N,  N  }, attrPos,            -1, false,      false,  PRECMASK_ALL                    },
2088                 { exponentialGroup,                             "exp2",                         "exp2",                         { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2089                 { exponentialGroup,                             "log2",                         "log2",                         { F,  F,  N,  N  }, attrPos,            -1, false,      false,  PRECMASK_ALL                    },
2090                 { exponentialGroup,                             "sqrt",                         "sqrt",                         { F,  F,  N,  N  }, attrPos,            -1, false,      false,  PRECMASK_ALL                    },
2091                 { exponentialGroup,                             "inversesqrt",          "inversesqrt",          { F,  F,  N,  N  }, attrPos,            -1, false,      false,  PRECMASK_ALL                    },
2092
2093                 { commonFunctionsGroup,                 "abs",                          "abs",                          { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2094                 { commonFunctionsGroup,                 "abs",                          "abs",                          { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2095                 { commonFunctionsGroup,                 "sign",                         "sign",                         { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2096                 { commonFunctionsGroup,                 "sign",                         "sign",                         { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2097                 { commonFunctionsGroup,                 "floor",                        "floor",                        { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2098                 { commonFunctionsGroup,                 "floor",                        "floor",                        { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2099                 { commonFunctionsGroup,                 "trunc",                        "trunc",                        { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2100                 { commonFunctionsGroup,                 "trunc",                        "trunc",                        { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2101                 { commonFunctionsGroup,                 "round",                        "round",                        { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2102                 { commonFunctionsGroup,                 "round",                        "round",                        { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2103                 { commonFunctionsGroup,                 "roundEven",            "roundEven",            { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2104                 { commonFunctionsGroup,                 "roundEven",            "roundEven",            { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2105                 { commonFunctionsGroup,                 "ceil",                         "ceil",                         { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2106                 { commonFunctionsGroup,                 "ceil",                         "ceil",                         { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2107                 { commonFunctionsGroup,                 "fract",                        "fract",                        { F,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2108                 { commonFunctionsGroup,                 "fract",                        "fract",                        { V4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2109                 { commonFunctionsGroup,                 "mod",                          "mod",                          { GT, GT, GT, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2110                 { commonFunctionsGroup,                 "min",                          "min",                          { F,  F,  F,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2111                 { commonFunctionsGroup,                 "min",                          "min",                          { V4, V4, V4, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2112                 { commonFunctionsGroup,                 "max",                          "max",                          { F,  F,  F,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2113                 { commonFunctionsGroup,                 "max",                          "max",                          { V4, V4, V4, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2114                 { commonFunctionsGroup,                 "clamp",                        "clamp",                        { F,  F,  F,  F  }, attrSmall,           2, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2115                 { commonFunctionsGroup,                 "clamp",                        "clamp",                        { V4, V4, V4, V4 }, attrSmall,           2, false,      false,  PRECMASK_ALL                    },
2116                 { commonFunctionsGroup,                 "mix",                          "mix",                          { F,  F,  F,  F  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2117                 { commonFunctionsGroup,                 "mix",                          "mix",                          { V4, V4, V4, V4 }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2118                 { commonFunctionsGroup,                 "mix",                          "mix",                          { F,  F,  F,  B  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2119                 { commonFunctionsGroup,                 "mix",                          "mix",                          { V4, V4, V4, B4 }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2120                 { commonFunctionsGroup,                 "step",                         "step",                         { F,  F,  F,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2121                 { commonFunctionsGroup,                 "step",                         "step",                         { V4, V4, V4, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2122                 { commonFunctionsGroup,                 "smoothstep",           "smoothstep",           { F,  F,  F,  F  }, attrSmall,           1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2123                 { commonFunctionsGroup,                 "smoothstep",           "smoothstep",           { V4, V4, V4, V4 }, attrSmall,           1, false,      false,  PRECMASK_ALL                    },
2124                 { commonFunctionsGroup,                 "isnan",                        "isnan",                        { B,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2125                 { commonFunctionsGroup,                 "isnan",                        "isnan",                        { B4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2126                 { commonFunctionsGroup,                 "isinf",                        "isinf",                        { B,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2127                 { commonFunctionsGroup,                 "isinf",                        "isinf",                        { B4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2128                 { commonFunctionsGroup,                 "floatBitsToInt",       "floatBitsToInt",       { I,  F,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2129                 { commonFunctionsGroup,                 "floatBitsToInt",       "floatBitsToInt",       { I4, V4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2130                 { commonFunctionsGroup,                 "intBitsToFloat",       "intBitsToFloat",       { F,  I,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_MEDIUMP_HIGHP  },
2131                 { commonFunctionsGroup,                 "intBitsToFloat",       "intBitsToFloat",       { V4, I4, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2132
2133                 { geometricFunctionsGroup,              "length",                       "length",                       { F,  VL, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2134                 { geometricFunctionsGroup,              "distance",                     "distance",                     { F,  VL, VL, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2135                 { geometricFunctionsGroup,              "dot",                          "dot",                          { F,  VL, VL, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2136                 { geometricFunctionsGroup,              "cross",                        "cross",                        { V3, V3, V3, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2137                 { geometricFunctionsGroup,              "normalize",            "normalize",            { VL, VL, N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2138                 { geometricFunctionsGroup,              "faceforward",          "faceforward",          { VL, VL, VL, VL }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2139                 { geometricFunctionsGroup,              "reflect",                      "reflect",                      { VL, VL, VL, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2140                 { geometricFunctionsGroup,              "refract",                      "refract",                      { VL, VL, VL, F  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2141
2142                 { matrixFunctionsGroup,                 "matrixCompMult",       "matrixCompMult",       { M,  M,  M,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2143                 { matrixFunctionsGroup,                 "transpose",            "transpose",            { M,  M,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2144                 { matrixFunctionsGroup,                 "inverse",                      "inverse",                      { M,  M,  N,  N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2145
2146                 { floatCompareGroup,                    "lessThan",                     "lessThan",                     { BV, FV, FV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2147                 { floatCompareGroup,                    "lessThanEqual",        "lessThanEqual",        { BV, FV, FV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2148                 { floatCompareGroup,                    "greaterThan",          "greaterThan",          { BV, FV, FV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2149                 { floatCompareGroup,                    "greaterThanEqual",     "greaterThanEqual",     { BV, FV, FV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2150                 { floatCompareGroup,                    "equal",                        "equal",                        { BV, FV, FV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2151                 { floatCompareGroup,                    "notEqual",                     "notEqual",                     { BV, FV, FV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2152
2153                 { intCompareGroup,                              "lessThan",                     "lessThan",                     { BV, IV, IV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2154                 { intCompareGroup,                              "lessThanEqual",        "lessThanEqual",        { BV, IV, IV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2155                 { intCompareGroup,                              "greaterThan",          "greaterThan",          { BV, IV, IV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2156                 { intCompareGroup,                              "greaterThanEqual",     "greaterThanEqual",     { BV, IV, IV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2157                 { intCompareGroup,                              "equal",                        "equal",                        { BV, IV, IV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2158                 { intCompareGroup,                              "notEqual",                     "notEqual",                     { BV, IV, IV, N  }, attrNegPos,         -1, false,      false,  PRECMASK_ALL                    },
2159
2160                 { boolCompareGroup,                             "equal",                        "equal",                        { BV, BV, BV, N  }, attrNegPos,         -1, false,      true,   PRECMASK_MEDIUMP                },
2161                 { boolCompareGroup,                             "notEqual",                     "notEqual",                     { BV, BV, BV, N  }, attrNegPos,         -1, false,      true,   PRECMASK_MEDIUMP                },
2162                 { boolCompareGroup,                             "any",                          "any",                          { B,  BV, N,  N  }, attrNegPos,         -1, false,      true,   PRECMASK_MEDIUMP                },
2163                 { boolCompareGroup,                             "all",                          "all",                          { B,  BV, N,  N  }, attrNegPos,         -1, false,      true,   PRECMASK_MEDIUMP                },
2164                 { boolCompareGroup,                             "not",                          "not",                          { BV, BV, N,  N  }, attrNegPos,         -1, false,      true,   PRECMASK_MEDIUMP                }
2165         };
2166
2167         // vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added.
2168         // \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group.
2169         tcu::TestCaseGroup*                                                     vertexSubGroup          = DE_NULL;
2170         tcu::TestCaseGroup*                                                     fragmentSubGroup        = DE_NULL;
2171         FunctionCase::InitialCalibrationStorage         vertexSubGroupCalibrationStorage;
2172         FunctionCase::InitialCalibrationStorage         fragmentSubGroupCalibrationStorage;
2173         for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++)
2174         {
2175                 tcu::TestCaseGroup* const       parentGroup                                     = functionCaseGroups[funcNdx].parentGroup;
2176                 const char* const                       groupName                                       = functionCaseGroups[funcNdx].groupName;
2177                 const char* const                       groupFunc                                       = functionCaseGroups[funcNdx].func;
2178                 const ValueType* const          funcTypes                                       = functionCaseGroups[funcNdx].types;
2179                 const Vec4&                                     groupAttribute                          = functionCaseGroups[funcNdx].attribute;
2180                 const int                                       modifyParamNdx                          = functionCaseGroups[funcNdx].modifyParamNdx;
2181                 const bool                                      useNearlyConstantInputs         = functionCaseGroups[funcNdx].useNearlyConstantInputs;
2182                 const bool                                      booleanCase                                     = functionCaseGroups[funcNdx].booleanCase;
2183                 const PrecisionMask                     precMask                                        = functionCaseGroups[funcNdx].precMask;
2184
2185                 // If this is a new function and not just a different version of the previously defined function, create a new group.
2186                 if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx-1].parentGroup || string(groupName) != functionCaseGroups[funcNdx-1].groupName)
2187                 {
2188                         tcu::TestCaseGroup* const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, "");
2189                         functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup);
2190
2191                         vertexSubGroup          = new tcu::TestCaseGroup(m_testCtx, "vertex", "");
2192                         fragmentSubGroup        = new tcu::TestCaseGroup(m_testCtx, "fragment", "");
2193
2194                         funcGroup->addChild(vertexSubGroup);
2195                         funcGroup->addChild(fragmentSubGroup);
2196
2197                         vertexSubGroupCalibrationStorage        = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2198                         fragmentSubGroupCalibrationStorage      = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2199                 }
2200
2201                 DE_ASSERT(vertexSubGroup != DE_NULL);
2202                 DE_ASSERT(fragmentSubGroup != DE_NULL);
2203
2204                 // Find the type size range of parameters (e.g. from 2 to 4 in case of vectors).
2205                 int genTypeFirstSize    = 1;
2206                 int genTypeLastSize             = 1;
2207
2208                 // Find the first return value or parameter with a gentype (if any) and set sizes accordingly.
2209                 // \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)"
2210                 for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++)
2211                 {
2212                         switch (funcTypes[i])
2213                         {
2214                                 case VALUE_FLOAT_VEC:
2215                                 case VALUE_BOOL_VEC:
2216                                 case VALUE_INT_VEC:                     // \note Fall-through.
2217                                         genTypeFirstSize = 2;
2218                                         genTypeLastSize = 4;
2219                                         break;
2220                                 case VALUE_FLOAT_VEC34:
2221                                         genTypeFirstSize = 3;
2222                                         genTypeLastSize = 4;
2223                                         break;
2224                                 case VALUE_FLOAT_GENTYPE:
2225                                 case VALUE_BOOL_GENTYPE:
2226                                 case VALUE_INT_GENTYPE:         // \note Fall-through.
2227                                         genTypeFirstSize = 1;
2228                                         genTypeLastSize = 4;
2229                                         break;
2230                                 case VALUE_MATRIX:
2231                                         genTypeFirstSize = 2;
2232                                         genTypeLastSize = 4;
2233                                         break;
2234                                 // If none of the above, keep looping.
2235                                 default:
2236                                         break;
2237                         }
2238                 }
2239
2240                 // Create a case for each possible size of the gentype.
2241                 for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++)
2242                 {
2243                         // Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize.
2244                         DataType types[FunctionCase::MAX_PARAMS + 1];
2245                         for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++)
2246                         {
2247                                 if (funcTypes[i] == VALUE_NONE)
2248                                         types[i] = TYPE_INVALID;
2249                                 else
2250                                 {
2251                                         int isFloat     = funcTypes[i] & VALUE_ANY_FLOAT;
2252                                         int isBool      = funcTypes[i] & VALUE_ANY_BOOL;
2253                                         int isInt       = funcTypes[i] & VALUE_ANY_INT;
2254                                         int isMat       = funcTypes[i] == VALUE_MATRIX;
2255                                         int inSize      = (funcTypes[i] & VALUE_ANY_GENTYPE)    ? curSize
2256                                                                 : funcTypes[i] == VALUE_VEC3                    ? 3
2257                                                                 : funcTypes[i] == VALUE_VEC4                    ? 4
2258                                                                 : funcTypes[i] == VALUE_BOOL_VEC4               ? 4
2259                                                                 : funcTypes[i] == VALUE_INT_VEC4                ? 4
2260                                                                 : 1;
2261                                         int                     typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1.
2262
2263                                         types[i]        = isFloat       ? floatTypes[typeArrayNdx]
2264                                                                 : isBool        ? boolTypes[typeArrayNdx]
2265                                                                 : isInt         ? intTypes[typeArrayNdx]
2266                                                                 : isMat         ? matrixTypes[typeArrayNdx]
2267                                                                 : TYPE_LAST;
2268                                 }
2269
2270                                 DE_ASSERT(types[i] != TYPE_LAST);
2271                         }
2272
2273                         // Array for just the parameter types.
2274                         DataType paramTypes[FunctionCase::MAX_PARAMS];
2275                         for (int i = 0; i < FunctionCase::MAX_PARAMS; i++)
2276                                 paramTypes[i] = types[i+1];
2277
2278                         for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++)
2279                         {
2280                                 if ((precMask & (1 << prec)) == 0)
2281                                         continue;
2282
2283                                 const string            precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_");
2284                                 std::ostringstream      caseName;
2285
2286                                 caseName << precisionPrefix;
2287
2288                                 // Write the name of each distinct parameter data type into the test case name.
2289                                 for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++)
2290                                 {
2291                                         if (i == 1 || types[i] != types[i-1])
2292                                         {
2293                                                 if (i > 1)
2294                                                         caseName << "_";
2295
2296                                                 caseName << getDataTypeName(types[i]);
2297                                         }
2298                                 }
2299
2300                                 for (int fragI = 0; fragI <= 1; fragI++)
2301                                 {
2302                                         const bool                                      vert    = fragI == 0;
2303                                         tcu::TestCaseGroup* const       group   = vert ? vertexSubGroup : fragmentSubGroup;
2304                                         group->addChild (new FunctionCase(m_context,
2305                                                                                                           caseName.str().c_str(), "",
2306                                                                                                           groupFunc,
2307                                                                                                           types[0], paramTypes,
2308                                                                                                           groupAttribute, modifyParamNdx, useNearlyConstantInputs,
2309                                                                                                           (Precision)prec, vert,
2310                                                                                                           vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage));
2311                                 }
2312                         }
2313                 }
2314         }
2315 }
2316
2317 } // Performance
2318 } // gles3
2319 } // deqp