Merge "Change GL_MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS minmax to 2048" into nougat...
[platform/upstream/VK-GL-CTS.git] / modules / gles3 / performance / es3pDepthTests.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Depth buffer performance tests.
22  *//*--------------------------------------------------------------------*/
23
24 #include "es3pDepthTests.hpp"
25
26 #include "glsCalibration.hpp"
27
28 #include "gluShaderProgram.hpp"
29 #include "gluObjectWrapper.hpp"
30 #include "gluPixelTransfer.hpp"
31
32 #include "glwFunctions.hpp"
33 #include "glwEnums.hpp"
34
35 #include "tcuTestLog.hpp"
36 #include "tcuStringTemplate.hpp"
37 #include "tcuCPUWarmup.hpp"
38 #include "tcuCommandLine.hpp"
39 #include "tcuResultCollector.hpp"
40
41 #include "deClock.h"
42 #include "deString.h"
43 #include "deMath.h"
44 #include "deStringUtil.hpp"
45 #include "deRandom.hpp"
46 #include "deUniquePtr.hpp"
47
48 #include <vector>
49 #include <algorithm>
50
51 namespace deqp
52 {
53 namespace gles3
54 {
55 namespace Performance
56 {
57 namespace
58 {
59 using namespace glw;
60 using de::MovePtr;
61 using tcu::TestContext;
62 using tcu::TestLog;
63 using tcu::Vec4;
64 using tcu::Vec3;
65 using tcu::Vec2;
66 using glu::RenderContext;
67 using glu::ProgramSources;
68 using glu::ShaderSource;
69 using std::vector;
70 using std::string;
71 using std::map;
72
73 struct Sample
74 {
75         deInt64 nullTime;
76         deInt64 baseTime;
77         deInt64 testTime;
78         int             order;
79         int             workload;
80 };
81
82 struct SampleParams
83 {
84         int step;
85         int measurement;
86
87         SampleParams(int step_, int measurement_) : step(step_), measurement(measurement_) {}
88 };
89
90 typedef vector<float> Geometry;
91
92 struct ObjectData
93 {
94         ProgramSources  shader;
95         Geometry                geometry;
96
97         ObjectData (const ProgramSources& shader_, const Geometry& geometry_) : shader(shader_), geometry(geometry_) {}
98 };
99
100 class RenderData
101 {
102 public:
103                                                                 RenderData              (const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log);
104                                                                 ~RenderData             (void) {};
105
106         const glu::ShaderProgram        m_program;
107         const glu::VertexArray          m_vao;
108         const glu::Buffer                       m_vbo;
109
110         const int                                       m_numVertices;
111 };
112
113 RenderData::RenderData (const ObjectData& object, const  glu::RenderContext& renderCtx, TestLog& log)
114         : m_program             (renderCtx, object.shader)
115         , m_vao                 (renderCtx.getFunctions())
116         , m_vbo                 (renderCtx.getFunctions())
117         , m_numVertices (int(object.geometry.size())/4)
118 {
119         const glw::Functions& gl = renderCtx.getFunctions();
120
121         if (!m_program.isOk())
122                 log << m_program;
123
124         gl.bindBuffer(GL_ARRAY_BUFFER, *m_vbo);
125         gl.bufferData(GL_ARRAY_BUFFER, object.geometry.size() * sizeof(float), &object.geometry[0], GL_STATIC_DRAW);
126         gl.bindAttribLocation(m_program.getProgram(), 0, "a_position");
127
128         gl.bindVertexArray(*m_vao);
129         gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
130         gl.enableVertexAttribArray(0);
131         gl.bindVertexArray(0);
132 }
133
134 namespace Utils
135 {
136         vector<float> getFullscreenQuad (float depth)
137         {
138                 const float data[] =
139                 {
140                         +1.0f, +1.0f, depth, 0.0f, // .w is gl_VertexId%3 since Nexus 4&5 can't handle that on their own
141                         +1.0f, -1.0f, depth, 1.0f,
142                         -1.0f, -1.0f, depth, 2.0f,
143                         -1.0f, -1.0f, depth, 0.0f,
144                         -1.0f, +1.0f, depth, 1.0f,
145                         +1.0f, +1.0f, depth, 2.0f,
146                 };
147
148                 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
149         }
150
151         vector<float> getFullscreenQuadWithGradient (float depth0, float depth1)
152         {
153                 const float data[] =
154                 {
155                         +1.0f, +1.0f, depth0, 0.0f,
156                         +1.0f, -1.0f, depth0, 1.0f,
157                         -1.0f, -1.0f, depth1, 2.0f,
158                         -1.0f, -1.0f, depth1, 0.0f,
159                         -1.0f, +1.0f, depth1, 1.0f,
160                         +1.0f, +1.0f, depth0, 2.0f,
161                 };
162
163                 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
164         }
165
166         vector<float> getPartScreenQuad (float coverage, float depth)
167         {
168                 const float xMax        = -1.0f + 2.0f*coverage;
169                 const float data[]      =
170                 {
171                          xMax, +1.0f, depth, 0.0f,
172                          xMax, -1.0f, depth, 1.0f,
173                         -1.0f, -1.0f, depth, 2.0f,
174                         -1.0f, -1.0f, depth, 0.0f,
175                         -1.0f, +1.0f, depth, 1.0f,
176                          xMax, +1.0f, depth, 2.0f,
177                 };
178
179                 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
180         }
181
182         // Axis aligned grid. Depth of vertices is baseDepth +/- depthNoise
183         vector<float> getFullScreenGrid (int resolution, deUint32 seed, float baseDepth, float depthNoise, float xyNoise)
184         {
185                 const int               gridsize        = resolution+1;
186                 vector<Vec3>    vertices        (gridsize*gridsize);
187                 vector<float>   retval;
188                 de::Random              rng                     (seed);
189
190                 for (int y = 0; y < gridsize; y++)
191                 for (int x = 0; x < gridsize; x++)
192                 {
193                         const bool      isEdge  = x == 0 || y == 0 || x == resolution || y == resolution;
194                         const float x_          = float(x)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise));
195                         const float y_          = float(y)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise));
196                         const float z_          = baseDepth + rng.getFloat(-depthNoise, +depthNoise);
197
198                         vertices[y*gridsize + x] = Vec3(x_, y_, z_);
199                 }
200
201                 retval.reserve(resolution*resolution*6);
202
203                 for (int y = 0; y < resolution; y++)
204                 for (int x = 0; x < resolution; x++)
205                 {
206                         const Vec3& p0 = vertices[(y+0)*gridsize + (x+0)];
207                         const Vec3& p1 = vertices[(y+0)*gridsize + (x+1)];
208                         const Vec3& p2 = vertices[(y+1)*gridsize + (x+0)];
209                         const Vec3& p3 = vertices[(y+1)*gridsize + (x+1)];
210
211                         const float temp[6*4] =
212                         {
213                                 p0.x(), p0.y(), p0.z(), 0.0f,
214                                 p2.x(), p2.y(), p2.z(), 1.0f,
215                                 p1.x(), p1.y(), p1.z(), 2.0f,
216
217                                 p3.x(), p3.y(), p3.z(), 0.0f,
218                                 p1.x(), p1.y(), p1.z(), 1.0f,
219                                 p2.x(), p2.y(), p2.z(), 2.0f,
220                         };
221
222                         retval.insert(retval.end(), DE_ARRAY_BEGIN(temp), DE_ARRAY_END(temp));
223                 }
224
225                 return retval;
226         }
227
228         // Outputs barycentric coordinates as v_bcoords. Otherwise a passthrough shader
229         string getBaseVertexShader (void)
230         {
231                 return "#version 300 es\n"
232                                 "in highp vec4 a_position;\n"
233                                 "out mediump vec3 v_bcoords;\n"
234                                 "void main()\n"
235                                 "{\n"
236                                 "       v_bcoords = vec3(0, 0, 0);\n"
237                                 "       v_bcoords[int(a_position.w)] = 1.0;\n"
238                                 "       gl_Position = vec4(a_position.xyz, 1.0);\n"
239                                 "}\n";
240         }
241
242         // Adds noise to coordinates based on InstanceID Outputs barycentric coordinates as v_bcoords
243         string getInstanceNoiseVertexShader (void)
244         {
245                 return "#version 300 es\n"
246                                 "in highp vec4 a_position;\n"
247                                 "out mediump vec3 v_bcoords;\n"
248                                 "void main()\n"
249                                 "{\n"
250                                 "       v_bcoords = vec3(0, 0, 0);\n"
251                                 "       v_bcoords[int(a_position.w)] = 1.0;\n"
252                                 "       vec3 noise = vec3(sin(float(gl_InstanceID)*1.05), sin(float(gl_InstanceID)*1.23), sin(float(gl_InstanceID)*1.71));\n"
253                                 "       gl_Position = vec4(a_position.xyz + noise * 0.005, 1.0);\n"
254                                 "}\n";
255         }
256
257         // Renders green triangles with edges highlighted. Exact shade depends on depth.
258         string getDepthAsGreenFragmentShader (void)
259         {
260                 return  "#version 300 es\n"
261                                 "in mediump vec3 v_bcoords;\n"
262                                 "out mediump vec4 fragColor;\n"
263                                 "void main()\n"
264                                 "{\n"
265                                 "       mediump float d = gl_FragCoord.z;\n"
266                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
267                                 "               fragColor = vec4(d,1,d,1);\n"
268                                 "       else\n"
269                                 "               fragColor = vec4(0,d,0,1);\n"
270                                 "}\n";
271         }
272
273         // Renders green triangles with edges highlighted. Exact shade depends on depth.
274         string getDepthAsRedFragmentShader (void)
275         {
276                 return  "#version 300 es\n"
277                                 "in mediump vec3 v_bcoords;\n"
278                                 "out mediump vec4 fragColor;\n"
279                                 "void main()\n"
280                                 "{\n"
281                                 "       mediump float d = gl_FragCoord.z;\n"
282                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
283                                 "               fragColor = vec4(1,d,d,1);\n"
284                                 "       else\n"
285                                 "               fragColor = vec4(d,0,0,1);\n"
286                                 "}\n";
287         }
288
289         // Basic time waster. Renders red triangles with edges highlighted. Exact shade depends on depth.
290         string getArithmeticWorkloadFragmentShader (void)
291         {
292
293                 return  "#version 300 es\n"
294                                 "in mediump vec3 v_bcoords;\n"
295                                 "out mediump vec4 fragColor;\n"
296                                 "uniform mediump int u_iterations;\n"
297                                 "void main()\n"
298                                 "{\n"
299                                 "       mediump float d = gl_FragCoord.z;\n"
300                                 "       for (int i = 0; i<u_iterations; i++)\n"
301                                 // cos(a)^2 + sin(a)^2 == 1. since d is in range [0,1] this will lose a few ULP's of precision per iteration but should not significantly change the value of d without extreme iteration counts
302                                 "               d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
303                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
304                                 "               fragColor = vec4(1,d,d,1);\n"
305                                 "       else\n"
306                                 "               fragColor = vec4(d,0,0,1);\n"
307                                 "}\n";
308         }
309
310         // Arithmetic workload shader but contains discard
311         string getArithmeticWorkloadDiscardFragmentShader (void)
312         {
313                 return  "#version 300 es\n"
314                                 "in mediump vec3 v_bcoords;\n"
315                                 "out mediump vec4 fragColor;\n"
316                                 "uniform mediump int u_iterations;\n"
317                                 "void main()\n"
318                                 "{\n"
319                                 "       mediump float d = gl_FragCoord.z;\n"
320                                 "       for (int i = 0; i<u_iterations; i++)\n"
321                                 "               d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
322                                 "       if (d < 0.5) discard;\n"
323                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
324                                 "               fragColor = vec4(1,d,d,1);\n"
325                                 "       else\n"
326                                 "               fragColor = vec4(d,0,0,1);\n"
327                                 "}\n";
328         }
329
330         // Texture fetch based time waster. Renders red triangles with edges highlighted. Exact shade depends on depth.
331         string getTextureWorkloadFragmentShader (void)
332         {
333                 return  "#version 300 es\n"
334                                 "in mediump vec3 v_bcoords;\n"
335                                 "out mediump vec4 fragColor;\n"
336                                 "uniform mediump int u_iterations;\n"
337                                 "uniform sampler2D u_texture;\n"
338                                 "void main()\n"
339                                 "{\n"
340                                 "       mediump float d = gl_FragCoord.z;\n"
341                                 "       for (int i = 0; i<u_iterations; i++)\n"
342                                 "               d *= texture(u_texture, (gl_FragCoord.xy+vec2(i))/512.0).r;\n" // Texture is expected to be fully white
343                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
344                                 "               fragColor = vec4(1,1,1,1);\n"
345                                 "       else\n"
346                                 "               fragColor = vec4(d,0,0,1);\n"
347                                 "}\n";
348         }
349
350         // Discard fragments in a grid pattern
351         string getGridDiscardFragmentShader (int gridsize)
352         {
353                 const string            fragSrc = "#version 300 es\n"
354                                                                           "in mediump vec3 v_bcoords;\n"
355                                                                           "out mediump vec4 fragColor;\n"
356                                                                           "void main()\n"
357                                                                           "{\n"
358                                                                           "     mediump float d = gl_FragCoord.z;\n"
359                                                                           "     if ((int(gl_FragCoord.x)/${GRIDRENDER_SIZE} + int(gl_FragCoord.y)/${GRIDRENDER_SIZE})%2 == 0)\n"
360                                                                           "             discard;\n"
361                                                                           "     if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
362                                                                           "             fragColor = vec4(d,1,d,1);\n"
363                                                                           "     else\n"
364                                                                           "             fragColor = vec4(0,d,0,1);\n"
365                                                                           "}\n";
366                 map<string, string>     params;
367
368                 params["GRIDRENDER_SIZE"] = de::toString(gridsize);
369
370                 return tcu::StringTemplate(fragSrc).specialize(params);
371         }
372
373         // A static increment to frag depth
374         string getStaticFragDepthFragmentShader (void)
375         {
376                 return  "#version 300 es\n"
377                                 "in mediump vec3 v_bcoords;\n"
378                                 "out mediump vec4 fragColor;\n"
379                                 "void main()\n"
380                                 "{\n"
381                                 "       mediump float d = gl_FragCoord.z;\n"
382                                 "       gl_FragDepth = gl_FragCoord.z + 0.1;\n"
383                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
384                                 "               fragColor = vec4(d,1,d,1);\n"
385                                 "       else\n"
386                                 "               fragColor = vec4(0,d,0,1);\n"
387                                 "}\n";
388         }
389
390         // A trivial dynamic change to frag depth
391         string getDynamicFragDepthFragmentShader (void)
392         {
393                 return  "#version 300 es\n"
394                                 "in mediump vec3 v_bcoords;\n"
395                                 "out mediump vec4 fragColor;\n"
396                                 "void main()\n"
397                                 "{\n"
398                                 "       mediump float d = gl_FragCoord.z;\n"
399                                 "       gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1
400                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
401                                 "               fragColor = vec4(d,1,d,1);\n"
402                                 "       else\n"
403                                 "               fragColor = vec4(0,d,0,1);\n"
404                                 "}\n";
405         }
406
407         // A static increment to frag depth
408         string getStaticFragDepthArithmeticWorkloadFragmentShader (void)
409         {
410                 return  "#version 300 es\n"
411                                 "in mediump vec3 v_bcoords;\n"
412                                 "out mediump vec4 fragColor;\n"
413                                 "uniform mediump int u_iterations;\n"
414                                 "void main()\n"
415                                 "{\n"
416                                 "       mediump float d = gl_FragCoord.z;\n"
417                                 "       gl_FragDepth = gl_FragCoord.z + 0.1;\n"
418                                 "       for (int i = 0; i<u_iterations; i++)\n"
419                                 "               d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
420                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
421                                 "               fragColor = vec4(1,d,d,1);\n"
422                                 "       else\n"
423                                 "               fragColor = vec4(d,0,0,1);\n"
424                                 "}\n";
425         }
426
427         // A trivial dynamic change to frag depth
428         string getDynamicFragDepthArithmeticWorkloadFragmentShader (void)
429         {
430                 return  "#version 300 es\n"
431                                 "in mediump vec3 v_bcoords;\n"
432                                 "out mediump vec4 fragColor;\n"
433                                 "uniform mediump int u_iterations;\n"
434                                 "void main()\n"
435                                 "{\n"
436                                 "       mediump float d = gl_FragCoord.z;\n"
437                                 "       gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1
438                                 "       for (int i = 0; i<u_iterations; i++)\n"
439                                 "               d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
440                                 "       if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
441                                 "               fragColor = vec4(1,d,d,1);\n"
442                                 "       else\n"
443                                 "               fragColor = vec4(d,0,0,1);\n"
444                                 "}\n";
445         }
446
447         glu::ProgramSources getBaseShader (void)
448         {
449                 return glu::makeVtxFragSources(getBaseVertexShader(), getDepthAsGreenFragmentShader());
450         }
451
452         glu::ProgramSources getArithmeticWorkloadShader (void)
453         {
454                 return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadFragmentShader());
455         }
456
457         glu::ProgramSources getArithmeticWorkloadDiscardShader (void)
458         {
459                 return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadDiscardFragmentShader());
460         }
461
462         glu::ProgramSources getTextureWorkloadShader (void)
463         {
464                 return glu::makeVtxFragSources(getBaseVertexShader(), getTextureWorkloadFragmentShader());
465         }
466
467         glu::ProgramSources getGridDiscardShader (int gridsize)
468         {
469                 return glu::makeVtxFragSources(getBaseVertexShader(), getGridDiscardFragmentShader(gridsize));
470         }
471
472         inline ObjectData quadWith (const glu::ProgramSources& shader, float depth)
473         {
474                 return ObjectData(shader, getFullscreenQuad(depth));
475         }
476
477         inline ObjectData quadWith (const string& fragShader, float depth)
478         {
479                 return ObjectData(glu::makeVtxFragSources(getBaseVertexShader(), fragShader), getFullscreenQuad(depth));
480         }
481
482         inline ObjectData variableQuad (float depth)
483         {
484                 return ObjectData(glu::makeVtxFragSources(getInstanceNoiseVertexShader(), getDepthAsRedFragmentShader()), getFullscreenQuad(depth));
485         }
486
487         inline ObjectData fastQuad (float depth)
488         {
489                 return ObjectData(getBaseShader(), getFullscreenQuad(depth));
490         }
491
492         inline ObjectData slowQuad (float depth)
493         {
494                 return ObjectData(getArithmeticWorkloadShader(), getFullscreenQuad(depth));
495         }
496
497         inline ObjectData fastQuadWithGradient (float depth0, float depth1)
498         {
499                 return ObjectData(getBaseShader(), getFullscreenQuadWithGradient(depth0, depth1));
500         }
501 } // Utils
502
503 // Shared base
504 class BaseCase : public tcu::TestCase
505 {
506 public:
507         enum {RENDER_SIZE = 512};
508
509                                                         BaseCase                        (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
510         virtual                                 ~BaseCase                       (void) {}
511
512         virtual IterateResult   iterate                         (void);
513
514 protected:
515         void                                    logSamples                      (const vector<Sample>& samples, const string& name, const string& desc);
516         void                                    logGeometry                     (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg);
517         virtual void                    logAnalysis                     (const vector<Sample>& samples) = 0;
518         virtual void                    logDescription          (void) = 0;
519
520         virtual ObjectData              genOccluderGeometry     (void) const = 0;
521         virtual ObjectData              genOccludedGeometry     (void) const = 0;
522
523         virtual int                             calibrate                       (void) const = 0;
524         virtual Sample                  renderSample            (const RenderData& occluder, const RenderData& occluded, int workload) const = 0;
525
526         void                                    render                          (const RenderData& data) const;
527         void                                    render                          (const RenderData& data, int instances) const;
528
529         const RenderContext&    m_renderCtx;
530         tcu::ResultCollector    m_results;
531
532         enum {ITERATION_STEPS = 10, ITERATION_SAMPLES = 16};
533 };
534
535 BaseCase::BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
536         : TestCase              (testCtx, tcu::NODETYPE_PERFORMANCE, name, desc)
537         , m_renderCtx   (renderCtx)
538 {
539 }
540
541 BaseCase::IterateResult BaseCase::iterate (void)
542 {
543         typedef de::MovePtr<RenderData> RenderDataP;
544
545         const glw::Functions&   gl                                      = m_renderCtx.getFunctions();
546         TestLog&                                log                                     = m_testCtx.getLog();
547
548         const glu::Framebuffer  framebuffer                     (gl);
549         const glu::Renderbuffer renderbuffer            (gl);
550         const glu::Renderbuffer depthbuffer                     (gl);
551
552         vector<Sample>                  results;
553         vector<int>                             params;
554         RenderDataP                             occluderData;
555         RenderDataP                             occludedData;
556         tcu::TextureLevel               resultTex                       (tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8), RENDER_SIZE, RENDER_SIZE);
557         int                                             maxWorkload                     = 0;
558         de::Random                              rng                                     (deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed());
559
560         logDescription();
561
562         gl.bindRenderbuffer(GL_RENDERBUFFER, *renderbuffer);
563         gl.renderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, RENDER_SIZE, RENDER_SIZE);
564         gl.bindRenderbuffer(GL_RENDERBUFFER, *depthbuffer);
565         gl.renderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, RENDER_SIZE, RENDER_SIZE);
566
567         gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
568         gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, *renderbuffer);
569         gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, *depthbuffer);
570         gl.viewport(0, 0, RENDER_SIZE, RENDER_SIZE);
571         gl.clearColor(0.125f, 0.25f, 0.5f, 1.0f);
572
573         maxWorkload = calibrate();
574
575         // Setup data
576         occluderData = RenderDataP(new RenderData (genOccluderGeometry(), m_renderCtx, log));
577         occludedData = RenderDataP(new RenderData (genOccludedGeometry(), m_renderCtx, log));
578
579         TCU_CHECK(occluderData->m_program.isOk());
580         TCU_CHECK(occludedData->m_program.isOk());
581
582         // Force initialization of GPU resources
583         gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
584         gl.enable(GL_DEPTH_TEST);
585
586         render(*occluderData);
587         render(*occludedData);
588         glu::readPixels(m_renderCtx, 0, 0, resultTex.getAccess());
589
590         logGeometry(resultTex.getAccess(), occluderData->m_program, occludedData->m_program);
591
592         params.reserve(ITERATION_STEPS*ITERATION_SAMPLES);
593
594         // Setup parameters
595         for (int step = 0; step < ITERATION_STEPS; step++)
596         {
597                 const int workload = maxWorkload*step/ITERATION_STEPS;
598
599                 for (int count = 0; count < ITERATION_SAMPLES; count++)
600                         params.push_back(workload);
601         }
602
603         rng.shuffle(params.begin(), params.end());
604
605         // Render samples
606         for (size_t ndx = 0; ndx < params.size(); ndx++)
607         {
608                 const int       workload        = params[ndx];
609                 Sample          sample          = renderSample(*occluderData, *occludedData, workload);
610
611                 sample.workload = workload;
612                 sample.order = int(ndx);
613
614                 results.push_back(sample);
615         }
616
617         logSamples(results, "Samples", "Samples");
618         logAnalysis(results);
619
620         m_results.setTestContextResult(m_testCtx);
621
622         return STOP;
623 }
624
625 void BaseCase::logSamples (const vector<Sample>& samples, const string& name, const string& desc)
626 {
627         TestLog& log = m_testCtx.getLog();
628
629         bool testOnly = true;
630
631         for (size_t ndx = 0; ndx < samples.size(); ndx++)
632         {
633                 if (samples[ndx].baseTime != 0 || samples[ndx].nullTime != 0)
634                 {
635                         testOnly = false;
636                         break;
637                 }
638         }
639
640         log << TestLog::SampleList(name, desc);
641
642         if (testOnly)
643         {
644                 log << TestLog::SampleInfo
645                         << TestLog::ValueInfo("Workload",       "Workload",                     "",                             QP_SAMPLE_VALUE_TAG_PREDICTOR)
646                         << TestLog::ValueInfo("Order",          "Order of sample",      "",                             QP_SAMPLE_VALUE_TAG_PREDICTOR)
647                         << TestLog::ValueInfo("TestTime",       "Test render time",     "us",                   QP_SAMPLE_VALUE_TAG_RESPONSE)
648                         << TestLog::EndSampleInfo;
649
650                 for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++)
651                 {
652                         const Sample& sample = samples[sampleNdx];
653
654                         log << TestLog::Sample << sample.workload << sample.order << sample.testTime << TestLog::EndSample;
655                 }
656         }
657         else
658         {
659                 log << TestLog::SampleInfo
660                         << TestLog::ValueInfo("Workload",       "Workload",                     "",                             QP_SAMPLE_VALUE_TAG_PREDICTOR)
661                         << TestLog::ValueInfo("Order",          "Order of sample",      "",                             QP_SAMPLE_VALUE_TAG_PREDICTOR)
662                         << TestLog::ValueInfo("TestTime",       "Test render time",     "us",                   QP_SAMPLE_VALUE_TAG_RESPONSE)
663                         << TestLog::ValueInfo("NullTime",       "Read pixels time",     "us",                   QP_SAMPLE_VALUE_TAG_RESPONSE)
664                         << TestLog::ValueInfo("BaseTime",       "Base render time",     "us",                   QP_SAMPLE_VALUE_TAG_RESPONSE)
665                         << TestLog::EndSampleInfo;
666
667                 for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++)
668                 {
669                         const Sample& sample = samples[sampleNdx];
670
671                         log << TestLog::Sample << sample.workload << sample.order << sample.testTime << sample.nullTime << sample.baseTime << TestLog::EndSample;
672                 }
673         }
674
675         log << TestLog::EndSampleList;
676 }
677
678 void BaseCase::logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg)
679 {
680         TestLog& log = m_testCtx.getLog();
681
682         log << TestLog::Section("Geometry", "Geometry");
683         log << TestLog::Message << "Occluding geometry is green with shade dependent on depth (rgb == 0, depth, 0)" << TestLog::EndMessage;
684         log << TestLog::Message << "Occluded geometry is red with shade dependent on depth (rgb == depth, 0, 0)" << TestLog::EndMessage;
685         log << TestLog::Message << "Primitive edges are a lighter shade of red/green" << TestLog::EndMessage;
686
687         log << TestLog::Image("Test Geometry", "Test Geometry",  sample);
688         log << TestLog::EndSection;
689
690         log << TestLog::Section("Occluder", "Occluder");
691         log << occluderProg;
692         log << TestLog::EndSection;
693
694         log << TestLog::Section("Occluded", "Occluded");
695         log << occludedProg;
696         log << TestLog::EndSection;
697 }
698
699 void BaseCase::render (const RenderData& data) const
700 {
701         const glw::Functions& gl = m_renderCtx.getFunctions();
702
703         gl.useProgram(data.m_program.getProgram());
704
705         gl.bindVertexArray(*data.m_vao);
706         gl.drawArrays(GL_TRIANGLES, 0, data.m_numVertices);
707         gl.bindVertexArray(0);
708 }
709
710 void BaseCase::render (const RenderData& data, int instances) const
711 {
712         const glw::Functions& gl = m_renderCtx.getFunctions();
713
714         gl.useProgram(data.m_program.getProgram());
715
716         gl.bindVertexArray(*data.m_vao);
717         gl.drawArraysInstanced(GL_TRIANGLES, 0, data.m_numVertices, instances);
718         gl.bindVertexArray(0);
719 }
720
721 // Render occluder once, then repeatedly render occluded geometry. Sample with multiple repetition counts & establish time per call with linear regression
722 class RenderCountCase : public BaseCase
723 {
724 public:
725                                         RenderCountCase         (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
726                                         ~RenderCountCase        (void) {}
727
728 protected:
729         virtual void    logAnalysis                     (const vector<Sample>& samples);
730
731 private:
732         virtual int             calibrate                       (void) const;
733         virtual Sample  renderSample            (const RenderData& occluder, const RenderData& occluded, int callcount) const;
734 };
735
736 RenderCountCase::RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
737         : BaseCase      (testCtx, renderCtx, name, desc)
738 {
739 }
740
741 void RenderCountCase::logAnalysis (const vector<Sample>& samples)
742 {
743         using namespace gls;
744
745         TestLog&                log                     = m_testCtx.getLog();
746         int                             maxWorkload     = 0;
747         vector<Vec2>    testSamples     (samples.size());
748
749         for (size_t ndx = 0; ndx < samples.size(); ndx++)
750         {
751                 const Sample& sample = samples[ndx];
752
753                 testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime);
754
755                 maxWorkload = de::max(maxWorkload, sample.workload);
756         }
757
758         {
759                 const float                                                     confidence      = 0.60f;
760                 const LineParametersWithConfidence      testParam       = theilSenSiegelLinearRegression(testSamples, confidence);
761                 const float                                                     usPerCall       = testParam.coefficient;
762                 const float                                                     pxPerCall       = RENDER_SIZE*RENDER_SIZE;
763                 const float                                                     pxPerUs         = pxPerCall/usPerCall;
764                 const float                                                     mpxPerS         = pxPerUs;
765
766                 log << TestLog::Section("Linear Regression", "Linear Regression");
767                 log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage;
768                 log << TestLog::Message << "Render time for scene with depth test was\n\t"
769                         << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset <<  ", " << testParam.offsetConfidenceUpper << "]us +"
770                         << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]"
771                         << "us/workload" << TestLog::EndMessage;
772                 log << TestLog::EndSection;
773
774                 log << TestLog::Section("Result", "Result");
775
776                 if (testParam.coefficientConfidenceLower < 0.0f)
777                 {
778                         log << TestLog::Message << "Coefficient confidence bounds include values below 0.0, the operation likely has neglible per-pixel cost" << TestLog::EndMessage;
779                         m_results.addResult(QP_TEST_RESULT_PASS, "Pass");
780                 }
781                 else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25)
782                 {
783                         log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage;
784                         m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
785                 }
786                 else
787                 {
788                         log << TestLog::Message << "Culled hidden pixels @ " << mpxPerS << "Mpx/s" << TestLog::EndMessage;
789                         m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(mpxPerS, 2));
790                 }
791
792                 log << TestLog::EndSection;
793         }
794 }
795
796 Sample RenderCountCase::renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const
797 {
798         const glw::Functions&   gl              = m_renderCtx.getFunctions();
799         Sample                                  sample;
800         deUint64                                now             = 0;
801         deUint64                                prev    = 0;
802         deUint8                                 buffer[4];
803
804         // Stabilize
805         {
806                 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
807                 gl.enable(GL_DEPTH_TEST);
808                 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
809         }
810
811         prev = deGetMicroseconds();
812
813         gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
814         gl.enable(GL_DEPTH_TEST);
815
816         render(occluder);
817         render(occluded, callcount);
818
819         gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
820
821         now = deGetMicroseconds();
822
823         sample.testTime = now - prev;
824         sample.baseTime = 0;
825         sample.nullTime = 0;
826         sample.workload = callcount;
827
828         return sample;
829 }
830
831 int RenderCountCase::calibrate (void) const
832 {
833         using namespace gls;
834
835         const glw::Functions&   gl                                      = m_renderCtx.getFunctions();
836         TestLog&                                log                                     = m_testCtx.getLog();
837
838         const RenderData                occluderGeometry        (genOccluderGeometry(), m_renderCtx, log);
839         const RenderData                occludedGeometry        (genOccludedGeometry(), m_renderCtx, log);
840
841         TheilSenCalibrator              calibrator                      (CalibratorParameters(20, // Initial workload
842                                                                                                                                           10, // Max iteration frames
843                                                                                                                                           20.0f, // Iteration shortcut threshold ms
844                                                                                                                                           20, // Max iterations
845                                                                                                                                           33.0f, // Target frame time
846                                                                                                                                           40.0f, // Frame time cap
847                                                                                                                                           1000.0f // Target measurement duration
848                                                                                                                                           ));
849
850         while (true)
851         {
852                 switch(calibrator.getState())
853                 {
854                         case TheilSenCalibrator::STATE_FINISHED:
855                                 logCalibrationInfo(m_testCtx.getLog(), calibrator);
856                                 return calibrator.getCallCount();
857
858                         case TheilSenCalibrator::STATE_MEASURE:
859                         {
860                                 deUint8 buffer[4];
861                                 deInt64 now;
862                                 deInt64 prev;
863
864                                 prev = deGetMicroseconds();
865
866                                 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
867                                 gl.disable(GL_DEPTH_TEST);
868
869                                 render(occluderGeometry);
870                                 render(occludedGeometry, calibrator.getCallCount());
871
872                                 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
873
874                                 now = deGetMicroseconds();
875
876                                 calibrator.recordIteration(now - prev);
877                                 break;
878                         }
879
880                         case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS:
881                                 calibrator.recomputeParameters();
882                                 break;
883                         default:
884                                 DE_ASSERT(false);
885                                 return 1;
886                 }
887         }
888 }
889
890 // Compares time/workload gradients of same geometry with and without depth testing
891 class RelativeChangeCase : public BaseCase
892 {
893 public:
894                                         RelativeChangeCase      (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
895         virtual                 ~RelativeChangeCase     (void) {}
896
897 protected:
898         Sample                  renderSample            (const RenderData& occluder, const RenderData& occluded, int workload) const;
899
900         virtual void    logAnalysis                     (const vector<Sample>& samples);
901
902 private:
903         int                             calibrate                       (void) const;
904 };
905
906 RelativeChangeCase::RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
907         : BaseCase              (testCtx, renderCtx, name, desc)
908 {
909 }
910
911 int RelativeChangeCase::calibrate (void) const
912 {
913         using namespace gls;
914
915         const glw::Functions&   gl              = m_renderCtx.getFunctions();
916         TestLog&                                log             = m_testCtx.getLog();
917
918         const RenderData                geom    (genOccludedGeometry(), m_renderCtx, log);
919
920         TheilSenCalibrator calibrator(CalibratorParameters( 20, // Initial workload
921                                                                                                                 10, // Max iteration frames
922                                                                                                                 20.0f, // Iteration shortcut threshold ms
923                                                                                                                 20, // Max iterations
924                                                                                                                 33.0f, // Target frame time
925                                                                                                                 40.0f, // Frame time cap
926                                                                                                                 1000.0f // Target measurement duration
927                                                                                                                 ));
928
929         while (true)
930         {
931                 switch(calibrator.getState())
932                 {
933                         case TheilSenCalibrator::STATE_FINISHED:
934                                 logCalibrationInfo(m_testCtx.getLog(), calibrator);
935                                 return calibrator.getCallCount();
936
937                         case TheilSenCalibrator::STATE_MEASURE:
938                         {
939                                 deUint8                 buffer[4];
940                                 const GLuint    program = geom.m_program.getProgram();
941
942                                 gl.useProgram(program);
943                                 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), calibrator.getCallCount());
944
945                                 const deInt64 prev = deGetMicroseconds();
946
947                                 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
948                                 gl.disable(GL_DEPTH_TEST);
949
950                                 render(geom);
951
952                                 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
953
954                                 const deInt64 now = deGetMicroseconds();
955
956                                 calibrator.recordIteration(now - prev);
957                                 break;
958                         }
959
960                         case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS:
961                                 calibrator.recomputeParameters();
962                                 break;
963                         default:
964                                 DE_ASSERT(false);
965                                 return 1;
966                 }
967         }
968 }
969
970 Sample RelativeChangeCase::renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const
971 {
972         const glw::Functions&   gl              = m_renderCtx.getFunctions();
973         const GLuint                    program = occluded.m_program.getProgram();
974         Sample                                  sample;
975         deUint64                                now             = 0;
976         deUint64                                prev    = 0;
977         deUint8                                 buffer[4];
978
979         gl.useProgram(program);
980         gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload);
981
982         // Warmup (this workload seems to reduce variation in following workloads)
983         {
984                 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
985                 gl.disable(GL_DEPTH_TEST);
986
987                 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
988         }
989
990         // Null time
991         {
992                 prev = deGetMicroseconds();
993
994                 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
995                 gl.disable(GL_DEPTH_TEST);
996
997                 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
998
999                 now = deGetMicroseconds();
1000
1001                 sample.nullTime = now - prev;
1002         }
1003
1004         // Test time
1005         {
1006                 prev = deGetMicroseconds();
1007
1008                 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1009                 gl.enable(GL_DEPTH_TEST);
1010
1011                 render(occluder);
1012                 render(occluded);
1013
1014                 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1015
1016                 now = deGetMicroseconds();
1017
1018                 sample.testTime = now - prev;
1019         }
1020
1021         // Base time
1022         {
1023                 prev = deGetMicroseconds();
1024
1025                 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1026                 gl.disable(GL_DEPTH_TEST);
1027
1028                 render(occluder);
1029                 render(occluded);
1030
1031                 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1032
1033                 now = deGetMicroseconds();
1034
1035                 sample.baseTime = now - prev;
1036         }
1037
1038         sample.workload = 0;
1039
1040         return sample;
1041 }
1042
1043 void RelativeChangeCase::logAnalysis (const vector<Sample>& samples)
1044 {
1045         using namespace gls;
1046
1047         TestLog&                log                     = m_testCtx.getLog();
1048
1049         int                             maxWorkload     = 0;
1050
1051         vector<Vec2>    nullSamples     (samples.size());
1052         vector<Vec2>    baseSamples     (samples.size());
1053         vector<Vec2>    testSamples     (samples.size());
1054
1055         for (size_t ndx = 0; ndx < samples.size(); ndx++)
1056         {
1057                 const Sample& sample = samples[ndx];
1058
1059                 nullSamples[ndx] = Vec2((float)sample.workload, (float)sample.nullTime);
1060                 baseSamples[ndx] = Vec2((float)sample.workload, (float)sample.baseTime);
1061                 testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime);
1062
1063                 maxWorkload = de::max(maxWorkload, sample.workload);
1064         }
1065
1066         {
1067                 const float                                                     confidence      = 0.60f;
1068
1069                 const LineParametersWithConfidence      nullParam       = theilSenSiegelLinearRegression(nullSamples, confidence);
1070                 const LineParametersWithConfidence      baseParam       = theilSenSiegelLinearRegression(baseSamples, confidence);
1071                 const LineParametersWithConfidence      testParam       = theilSenSiegelLinearRegression(testSamples, confidence);
1072
1073                 if (!de::inRange(0.0f, nullParam.coefficientConfidenceLower, nullParam.coefficientConfidenceUpper))
1074                 {
1075                         m_results.addResult(QP_TEST_RESULT_FAIL, "Constant operation sequence duration not constant");
1076                         log << TestLog::Message << "Constant operation sequence timing may vary as a function of workload. Result quality extremely low" << TestLog::EndMessage;
1077                 }
1078
1079                 if (de::inRange(0.0f, baseParam.coefficientConfidenceLower, baseParam.coefficientConfidenceUpper))
1080                 {
1081                         m_results.addResult(QP_TEST_RESULT_FAIL, "Workload has no effect on duration");
1082                         log << TestLog::Message << "Workload factor has no effect on duration of sample (smart optimizer?)" << TestLog::EndMessage;
1083                 }
1084
1085                 log << TestLog::Section("Linear Regression", "Linear Regression");
1086                 log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage;
1087
1088                 log << TestLog::Message << "Render time for empty scene was\n\t"
1089                         << "[" << nullParam.offsetConfidenceLower << ", " << nullParam.offset <<  ", " << nullParam.offsetConfidenceUpper << "]us +"
1090                         << "[" << nullParam.coefficientConfidenceLower << ", " << nullParam.coefficient << ", " << nullParam.coefficientConfidenceUpper << "]"
1091                         << "us/workload" << TestLog::EndMessage;
1092
1093                 log << TestLog::Message << "Render time for scene without depth test was\n\t"
1094                         << "[" << baseParam.offsetConfidenceLower << ", " << baseParam.offset <<  ", " << baseParam.offsetConfidenceUpper << "]us +"
1095                         << "[" << baseParam.coefficientConfidenceLower << ", " << baseParam.coefficient << ", " << baseParam.coefficientConfidenceUpper << "]"
1096                         << "us/workload" << TestLog::EndMessage;
1097
1098                 log << TestLog::Message << "Render time for scene with depth test was\n\t"
1099                         << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset <<  ", " << testParam.offsetConfidenceUpper << "]us +"
1100                         << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]"
1101                         << "us/workload" << TestLog::EndMessage;
1102
1103                 log << TestLog::EndSection;
1104
1105                 if (de::inRange(0.0f, testParam.coefficientConfidenceLower, testParam.coefficientConfidenceUpper))
1106                 {
1107                         log << TestLog::Message << "Test duration not dependent on culled workload" << TestLog::EndMessage;
1108                         m_results.addResult(QP_TEST_RESULT_PASS, "0.0");
1109                 }
1110                 else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25)
1111                 {
1112                         log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage;
1113                         m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
1114                 }
1115                 else if (baseParam.coefficientConfidenceLower < baseParam.coefficientConfidenceUpper*0.25)
1116                 {
1117                         log << TestLog::Message << "Coefficient confidence range for base render time is extremely large, cannot give reliable result" << TestLog::EndMessage;
1118                         m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
1119                 }
1120                 else
1121                 {
1122                         log << TestLog::Message << "Test duration is dependent on culled workload" << TestLog::EndMessage;
1123                         m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(de::abs(testParam.coefficient)/de::abs(baseParam.coefficient), 2));
1124                 }
1125         }
1126 }
1127
1128 // Speed of trivial culling
1129 class BaseCostCase : public RenderCountCase
1130 {
1131 public:
1132                                                 BaseCostCase            (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1133                                                         : RenderCountCase (testCtx, renderCtx, name, desc) {}
1134
1135                                                 ~BaseCostCase           (void) {}
1136
1137 private:
1138         virtual ObjectData      genOccluderGeometry     (void) const { return Utils::fastQuad(0.2f); }
1139         virtual ObjectData      genOccludedGeometry     (void) const { return Utils::variableQuad(0.8f); }
1140
1141         virtual void            logDescription          (void)
1142         {
1143                 TestLog& log = m_testCtx.getLog();
1144
1145                 log << TestLog::Section("Description", "Test description");
1146                 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1147                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1148                 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered"  << TestLog::EndMessage;
1149                 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1150                 log << TestLog::EndSection;
1151         }
1152 };
1153
1154 // Gradient
1155 class GradientCostCase : public RenderCountCase
1156 {
1157 public:
1158                                                 GradientCostCase        (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float gradientDistance)
1159                                                         : RenderCountCase               (testCtx, renderCtx, name, desc)
1160                                                         , m_gradientDistance    (gradientDistance)
1161                                                 {
1162                                                 }
1163
1164                                                 ~GradientCostCase       (void) {}
1165
1166 private:
1167         virtual ObjectData      genOccluderGeometry     (void) const { return Utils::fastQuadWithGradient(0.0f, 1.0f - m_gradientDistance); }
1168         virtual ObjectData      genOccludedGeometry     (void) const
1169         {
1170                 return ObjectData(glu::makeVtxFragSources(Utils::getInstanceNoiseVertexShader(), Utils::getDepthAsRedFragmentShader()), Utils::getFullscreenQuadWithGradient(m_gradientDistance, 1.0f));
1171         }
1172
1173         virtual void            logDescription          (void)
1174         {
1175                 TestLog& log = m_testCtx.getLog();
1176
1177                 log << TestLog::Section("Description", "Test description");
1178                 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1179                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1180                 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1181                 log << TestLog::Message << "The quads are tilted so that the left edge of the occluded quad has a depth of 1.0 and the right edge of the occluding quad has a depth of 0.0." << TestLog::EndMessage;
1182                 log << TestLog::Message << "The quads are spaced to have a depth difference of " << m_gradientDistance << " at all points." << TestLog::EndMessage;
1183                 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1184                 log << TestLog::EndSection;
1185         }
1186
1187         const float                     m_gradientDistance;
1188 };
1189
1190 // Constant offset to frag depth in occluder
1191 class OccluderStaticFragDepthCostCase : public RenderCountCase
1192 {
1193 public:
1194                                                 OccluderStaticFragDepthCostCase         (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1195                                                         : RenderCountCase(testCtx, renderCtx, name, desc)
1196                                                 {
1197                                                 }
1198
1199                                                 ~OccluderStaticFragDepthCostCase        (void) {}
1200
1201 private:
1202         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1203         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::fastQuad(0.8f); }
1204
1205         virtual void            logDescription                                          (void)
1206         {
1207                 TestLog& log = m_testCtx.getLog();
1208
1209                 log << TestLog::Section("Description", "Test description");
1210                 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1211                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1212                 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1213                 log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1214                 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1215                 log << TestLog::EndSection;
1216         }
1217 };
1218
1219 // Dynamic offset to frag depth in occluder
1220 class OccluderDynamicFragDepthCostCase : public RenderCountCase
1221 {
1222 public:
1223                                                 OccluderDynamicFragDepthCostCase        (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1224                                                         : RenderCountCase(testCtx, renderCtx, name, desc)
1225                                                 {
1226                                                 }
1227
1228                                                 ~OccluderDynamicFragDepthCostCase       (void) {}
1229
1230 private:
1231         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1232         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::fastQuad(0.8f); }
1233
1234         virtual void            logDescription                                          (void)
1235         {
1236                 TestLog& log = m_testCtx.getLog();
1237
1238                 log << TestLog::Section("Description", "Test description");
1239                 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1240                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1241                 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1242                 log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1243                 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1244                 log << TestLog::EndSection;
1245         }
1246 };
1247
1248 // Constant offset to frag depth in occluder
1249 class OccludedStaticFragDepthCostCase : public RenderCountCase
1250 {
1251 public:
1252                                                 OccludedStaticFragDepthCostCase         (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1253                                                         : RenderCountCase(testCtx, renderCtx, name, desc)
1254                                                 {
1255                                                 }
1256
1257                                                 ~OccludedStaticFragDepthCostCase        (void) {}
1258
1259 private:
1260         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::fastQuad(0.2f); }
1261         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1262
1263         virtual void            logDescription                                          (void)
1264         {
1265                 TestLog& log = m_testCtx.getLog();
1266
1267                 log << TestLog::Section("Description", "Test description");
1268                 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1269                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1270                 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1271                 log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1272                 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1273                 log << TestLog::EndSection;
1274         }
1275 };
1276
1277 // Dynamic offset to frag depth in occluder
1278 class OccludedDynamicFragDepthCostCase : public RenderCountCase
1279 {
1280 public:
1281                                                 OccludedDynamicFragDepthCostCase        (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1282                                                         : RenderCountCase(testCtx, renderCtx, name, desc)
1283                                                 {
1284                                                 }
1285
1286                                                 ~OccludedDynamicFragDepthCostCase       (void) {}
1287
1288 private:
1289         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::fastQuad(0.2f); }
1290         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1291
1292         virtual void            logDescription                                          (void)
1293         {
1294                 TestLog& log = m_testCtx.getLog();
1295
1296                 log << TestLog::Section("Description", "Test description");
1297                 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1298                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1299                 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1300                 log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1301                 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1302                 log << TestLog::EndSection;
1303         }
1304 };
1305
1306 // Culling speed with slightly less trivial geometry
1307 class OccludingGeometryComplexityCostCase : public RenderCountCase
1308 {
1309 public:
1310                                                 OccludingGeometryComplexityCostCase             (TestContext&                   testCtx,
1311                                                                                                                                  const RenderContext&   renderCtx,
1312                                                                                                                                  const char*                    name,
1313                                                                                                                                  const char*                    desc,
1314                                                                                                                                  int                                    resolution,
1315                                                                                                                                  float                                  xyNoise,
1316                                                                                                                                  float                                  zNoise)
1317                                                         : RenderCountCase       (testCtx, renderCtx, name, desc)
1318                                                         , m_resolution          (resolution)
1319                                                         , m_xyNoise                     (xyNoise)
1320                                                         , m_zNoise                      (zNoise)
1321                                                 {
1322                                                 }
1323
1324                                                 ~OccludingGeometryComplexityCostCase    (void) {}
1325
1326 private:
1327         virtual ObjectData      genOccluderGeometry                                             (void) const
1328         {
1329                 return ObjectData(Utils::getBaseShader(),
1330                                                   Utils::getFullScreenGrid(m_resolution,
1331                                                   deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed(),
1332                                                   0.2f,
1333                                                   m_zNoise,
1334                                                   m_xyNoise));
1335         }
1336
1337         virtual ObjectData      genOccludedGeometry                                             (void) const { return Utils::variableQuad(0.8f); }
1338
1339         virtual void            logDescription          (void)
1340         {
1341                 TestLog& log = m_testCtx.getLog();
1342
1343                 log << TestLog::Section("Description", "Test description");
1344                 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1345                 log << TestLog::Message << "Geometry consists of an occluding grid and an occluded fullsceen quad. The occluding geometry is rendered once, the occluded one is rendered repeatedly" << TestLog::EndMessage;
1346                 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered"  << TestLog::EndMessage;
1347                 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1348                 log << TestLog::EndSection;
1349         }
1350
1351         const int                       m_resolution;
1352         const float                     m_xyNoise;
1353         const float                     m_zNoise;
1354 };
1355
1356
1357 // Cases with varying workloads in the fragment shader
1358 class FragmentWorkloadCullCase : public RelativeChangeCase
1359 {
1360 public:
1361                                                 FragmentWorkloadCullCase        (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
1362         virtual                         ~FragmentWorkloadCullCase       (void) {}
1363
1364 private:
1365         virtual ObjectData      genOccluderGeometry                     (void) const { return Utils::fastQuad(0.2f); }
1366
1367         virtual void            logDescription                          (void);
1368 };
1369
1370 FragmentWorkloadCullCase::FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1371         : RelativeChangeCase    (testCtx, renderCtx, name, desc)
1372 {
1373 }
1374
1375 void FragmentWorkloadCullCase::logDescription (void)
1376 {
1377         TestLog& log = m_testCtx.getLog();
1378
1379         log << TestLog::Section("Description", "Test description");
1380         log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage;
1381         log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader,"
1382                 "the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1383         log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1384         log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1385         log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1386         log << TestLog::EndSection;
1387 }
1388
1389 // Additional workload consists of texture lookups
1390 class FragmentTextureWorkloadCullCase : public FragmentWorkloadCullCase
1391 {
1392 public:
1393                                                 FragmentTextureWorkloadCullCase         (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
1394         virtual                         ~FragmentTextureWorkloadCullCase        (void) {}
1395
1396         virtual void            init                                                            (void);
1397         virtual void            deinit                                                          (void);
1398
1399 private:
1400         typedef MovePtr<glu::Texture> TexPtr;
1401
1402         virtual ObjectData      genOccludedGeometry                                     (void) const
1403         {
1404                 return ObjectData(Utils::getTextureWorkloadShader(), Utils::getFullscreenQuad(0.8f));
1405         }
1406
1407         TexPtr                          m_texture;
1408 };
1409
1410 FragmentTextureWorkloadCullCase::FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1411         : FragmentWorkloadCullCase      (testCtx, renderCtx, name, desc)
1412 {
1413 }
1414
1415 void FragmentTextureWorkloadCullCase::init (void)
1416 {
1417         const glw::Functions&   gl              = m_renderCtx.getFunctions();
1418         const int                               size    = 128;
1419         const vector<deUint8>   data    (size*size*4, 255);
1420
1421         m_texture = MovePtr<glu::Texture>(new glu::Texture(gl));
1422
1423         gl.bindTexture(GL_TEXTURE_2D, m_texture);
1424         gl.texImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size, size, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
1425         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1426         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1427 }
1428
1429 void FragmentTextureWorkloadCullCase::deinit (void)
1430 {
1431         m_texture.clear();
1432 }
1433
1434 // Additional workload consists of arithmetic
1435 class FragmentArithmeticWorkloadCullCase : public FragmentWorkloadCullCase
1436 {
1437 public:
1438                                                 FragmentArithmeticWorkloadCullCase      (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1439                                                 : FragmentWorkloadCullCase      (testCtx, renderCtx, name, desc)
1440                                         {
1441                                         }
1442         virtual                         ~FragmentArithmeticWorkloadCullCase     (void) {}
1443
1444 private:
1445         virtual ObjectData      genOccludedGeometry                                     (void) const
1446         {
1447                 return ObjectData(Utils::getArithmeticWorkloadShader(), Utils::getFullscreenQuad(0.8f));
1448         }
1449 };
1450
1451 // Contains dynamicly unused discard after a series of calculations
1452 class FragmentDiscardArithmeticWorkloadCullCase : public FragmentWorkloadCullCase
1453 {
1454 public:
1455                                                 FragmentDiscardArithmeticWorkloadCullCase       (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1456                                                 : FragmentWorkloadCullCase      (testCtx, renderCtx, name, desc)
1457                                         {
1458                                         }
1459
1460         virtual                         ~FragmentDiscardArithmeticWorkloadCullCase      (void) {}
1461
1462 private:
1463         virtual ObjectData      genOccludedGeometry                                                     (void) const
1464         {
1465                 return ObjectData(Utils::getArithmeticWorkloadDiscardShader(), Utils::getFullscreenQuad(0.8f));
1466         }
1467
1468         virtual void            logDescription                                                          (void)
1469         {
1470                 TestLog& log = m_testCtx.getLog();
1471
1472                 log << TestLog::Section("Description", "Test description");
1473                 log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage;
1474                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader,"
1475                         "the second (occluded) contains significant fragment shader work and a discard that is never triggers but has a dynamic condition" << TestLog::EndMessage;
1476                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1477                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1478                 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1479                 log << TestLog::EndSection;
1480         }
1481 };
1482
1483 // Discards fragments from the occluder in a grid pattern
1484 class PartialOccluderDiscardCullCase : public RelativeChangeCase
1485 {
1486 public:
1487                                                 PartialOccluderDiscardCullCase  (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, int gridsize)
1488                                                         : RelativeChangeCase            (testCtx, renderCtx, name, desc)
1489                                                         , m_gridsize    (gridsize)
1490                                                 {
1491                                                 }
1492         virtual                         ~PartialOccluderDiscardCullCase (void) {}
1493
1494 private:
1495         virtual ObjectData      genOccluderGeometry                             (void) const { return Utils::quadWith(Utils::getGridDiscardShader(m_gridsize), 0.2f); }
1496         virtual ObjectData      genOccludedGeometry                             (void) const { return Utils::slowQuad(0.8f); }
1497
1498         virtual void            logDescription                                  (void)
1499         {
1500                 TestLog& log = m_testCtx.getLog();
1501
1502                 log << TestLog::Section("Description", "Test description");
1503                 log << TestLog::Message << "Testing effects of partially discarded occluder on rendering time" << TestLog::EndMessage;
1504                 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad discards half the "
1505                         "fragments in a grid pattern, the second (partially occluded) contains significant fragment shader work" << TestLog::EndMessage;
1506                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1507                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1508                 log << TestLog::Message << "Successfull early Z-testing should result in depth testing halving the render time"  << TestLog::EndMessage;
1509                 log << TestLog::EndSection;
1510         }
1511
1512         const int                       m_gridsize;
1513 };
1514
1515 // Trivial occluder covering part of screen
1516 class PartialOccluderCullCase : public RelativeChangeCase
1517 {
1518 public:
1519                                                 PartialOccluderCullCase         (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float coverage)
1520                                                         : RelativeChangeCase            (testCtx, renderCtx, name, desc)
1521                                                         , m_coverage    (coverage)
1522                                                 {
1523                                                 }
1524                                                 ~PartialOccluderCullCase        (void) {}
1525
1526 private:
1527         virtual ObjectData      genOccluderGeometry                     (void) const { return ObjectData(Utils::getBaseShader(), Utils::getPartScreenQuad(m_coverage, 0.2f)); }
1528         virtual ObjectData      genOccludedGeometry                     (void) const {return Utils::slowQuad(0.8f); }
1529
1530         virtual void            logDescription                          (void)
1531         {
1532                 TestLog& log = m_testCtx.getLog();
1533
1534                 log << TestLog::Section("Description", "Test description");
1535                 log << TestLog::Message << "Testing effects of partial occluder on rendering time" << TestLog::EndMessage;
1536                 log << TestLog::Message << "Geometry consists of two quads. The first (occluding) quad covers " << m_coverage*100.0f
1537                         << "% of the screen, while the second (partially occluded, fullscreen) contains significant fragment shader work" << TestLog::EndMessage;
1538                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1539                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1540                 log << TestLog::Message << "Successfull early Z-testing should result in render time increasing proportionally with unoccluded area"  << TestLog::EndMessage;
1541                 log << TestLog::EndSection;
1542         }
1543
1544         const float                     m_coverage;
1545 };
1546
1547 // Constant offset to frag depth in occluder
1548 class StaticOccluderFragDepthCullCase : public RelativeChangeCase
1549 {
1550 public:
1551                                                 StaticOccluderFragDepthCullCase         (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1552                                                         : RelativeChangeCase(testCtx, renderCtx, name, desc)
1553                                                 {
1554                                                 }
1555
1556                                                 ~StaticOccluderFragDepthCullCase        (void) {}
1557
1558 private:
1559         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1560         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::slowQuad(0.8f); }
1561
1562         virtual void            logDescription                                          (void)
1563         {
1564                 TestLog& log = m_testCtx.getLog();
1565
1566                 log << TestLog::Section("Description", "Test description");
1567                 log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage;
1568                 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1569                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1570                 log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1571                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1572                 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1573                 log << TestLog::EndSection;
1574         }
1575 };
1576
1577 // Dynamic offset to frag depth in occluder
1578 class DynamicOccluderFragDepthCullCase : public RelativeChangeCase
1579 {
1580 public:
1581                                                 DynamicOccluderFragDepthCullCase        (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1582                                                         : RelativeChangeCase(testCtx, renderCtx, name, desc)
1583                                                 {
1584                                                 }
1585
1586                                                 ~DynamicOccluderFragDepthCullCase       (void) {}
1587
1588 private:
1589         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1590         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::slowQuad(0.8f); }
1591
1592         virtual void            logDescription                                          (void)
1593         {
1594                 TestLog& log = m_testCtx.getLog();
1595
1596                 log << TestLog::Section("Description", "Test description");
1597                 log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage;
1598                 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1599                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1600                 log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1601                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1602                 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1603                 log << TestLog::EndSection;
1604         }
1605 };
1606
1607 // Constant offset to frag depth in occluded
1608 class StaticOccludedFragDepthCullCase : public RelativeChangeCase
1609 {
1610 public:
1611                                                 StaticOccludedFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1612                                                         : RelativeChangeCase(testCtx, renderCtx, name, desc)
1613                                                 {
1614                                                 }
1615
1616                                                 ~StaticOccludedFragDepthCullCase        (void) {}
1617
1618 private:
1619         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::fastQuad(0.2f); }
1620         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::quadWith(Utils::getStaticFragDepthArithmeticWorkloadFragmentShader(), 0.2f); }
1621
1622         virtual void            logDescription                                          (void)
1623         {
1624                 TestLog& log = m_testCtx.getLog();
1625
1626                 log << TestLog::Section("Description", "Test description");
1627                 log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage;
1628                 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1629                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1630                 log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1631                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1632                 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1633                 log << TestLog::EndSection;
1634         }
1635 };
1636
1637 // Dynamic offset to frag depth in occluded
1638 class DynamicOccludedFragDepthCullCase : public RelativeChangeCase
1639 {
1640 public:
1641                                                 DynamicOccludedFragDepthCullCase        (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1642                                                         : RelativeChangeCase(testCtx, renderCtx, name, desc)
1643                                                 {
1644                                                 }
1645
1646                                                 ~DynamicOccludedFragDepthCullCase       (void) {}
1647
1648 private:
1649         virtual ObjectData      genOccluderGeometry                                     (void) const { return Utils::fastQuad(0.2f); }
1650         virtual ObjectData      genOccludedGeometry                                     (void) const { return Utils::quadWith(Utils::getDynamicFragDepthArithmeticWorkloadFragmentShader(), 0.2f); }
1651
1652         virtual void            logDescription                                          (void)
1653         {
1654                 TestLog& log = m_testCtx.getLog();
1655
1656                 log << TestLog::Section("Description", "Test description");
1657                 log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage;
1658                 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1659                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1660                 log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1661                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1662                 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1663                 log << TestLog::EndSection;
1664         }
1665 };
1666
1667 // Dynamic offset to frag depth in occluded
1668 class ReversedDepthOrderCullCase : public RelativeChangeCase
1669 {
1670 public:
1671                                                 ReversedDepthOrderCullCase      (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1672                                                         : RelativeChangeCase(testCtx, renderCtx, name, desc)
1673                                                 {
1674                                                 }
1675
1676                                                 ~ReversedDepthOrderCullCase     (void) {}
1677
1678 private:
1679         virtual ObjectData      genOccluderGeometry                     (void) const { return Utils::fastQuad(0.2f); }
1680         virtual ObjectData      genOccludedGeometry                     (void) const { return Utils::slowQuad(0.8f); }
1681
1682         virtual void            logDescription                          (void)
1683         {
1684                 TestLog& log = m_testCtx.getLog();
1685
1686                 log << TestLog::Section("Description", "Test description");
1687                 log << TestLog::Message << "Testing effects of of back first rendering order on culling efficiency" << TestLog::EndMessage;
1688                 log << TestLog::Message << "Geometry consists of two fullscreen quads. The second (occluding) quad is trivial, while the first (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1689                 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1690                 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1691                 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1692                 log << TestLog::EndSection;
1693         }
1694
1695         // Rendering order of occluder & occluded is reversed, otherwise identical to parent version
1696         Sample                          renderSample                            (const RenderData& occluder, const RenderData& occluded, int workload) const
1697         {
1698                 const glw::Functions&   gl              = m_renderCtx.getFunctions();
1699                 const GLuint                    program = occluded.m_program.getProgram();
1700                 Sample                                  sample;
1701                 deUint64                                now             = 0;
1702                 deUint64                                prev    = 0;
1703                 deUint8                                 buffer[4];
1704
1705                 gl.useProgram(program);
1706                 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload);
1707
1708                 // Warmup (this workload seems to reduce variation in following workloads)
1709                 {
1710                         gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1711                         gl.disable(GL_DEPTH_TEST);
1712
1713                         gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1714                 }
1715
1716                 // Null time
1717                 {
1718                         prev = deGetMicroseconds();
1719
1720                         gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1721                         gl.disable(GL_DEPTH_TEST);
1722
1723                         gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1724
1725                         now = deGetMicroseconds();
1726
1727                         sample.nullTime = now - prev;
1728                 }
1729
1730                 // Test time
1731                 {
1732                         prev = deGetMicroseconds();
1733
1734                         gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1735                         gl.enable(GL_DEPTH_TEST);
1736
1737                         render(occluded);
1738                         render(occluder);
1739
1740                         gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1741
1742                         now = deGetMicroseconds();
1743
1744                         sample.testTime = now - prev;
1745                 }
1746
1747                 // Base time
1748                 {
1749                         prev = deGetMicroseconds();
1750
1751                         gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1752                         gl.disable(GL_DEPTH_TEST);
1753
1754                         render(occluded);
1755                         render(occluder);
1756
1757                         gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1758
1759                         now = deGetMicroseconds();
1760
1761                         sample.baseTime = now - prev;
1762                 }
1763
1764                 sample.workload = 0;
1765
1766                 return sample;
1767         }
1768 };
1769
1770 } // Anonymous
1771
1772 DepthTests::DepthTests (Context& context)
1773         : TestCaseGroup (context, "depth", "Depth culling performance")
1774 {
1775 }
1776
1777 void DepthTests::init (void)
1778 {
1779         TestContext&                    testCtx         = m_context.getTestContext();
1780         const RenderContext&    renderCtx       = m_context.getRenderContext();
1781
1782         {
1783                 tcu::TestCaseGroup* const cullEfficiencyGroup = new tcu::TestCaseGroup(m_testCtx, "cull_efficiency", "Fragment cull efficiency");
1784
1785                 addChild(cullEfficiencyGroup);
1786
1787                 {
1788                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "workload", "Workload");
1789
1790                         cullEfficiencyGroup->addChild(group);
1791
1792                         group->addChild(new FragmentTextureWorkloadCullCase(                    testCtx, renderCtx, "workload_texture",                         "Fragment shader with texture lookup workload"));
1793                         group->addChild(new FragmentArithmeticWorkloadCullCase(                 testCtx, renderCtx, "workload_arithmetic",                      "Fragment shader with arithmetic workload"));
1794                         group->addChild(new FragmentDiscardArithmeticWorkloadCullCase(  testCtx, renderCtx, "workload_arithmetic_discard",      "Fragment shader that may discard with arithmetic workload"));
1795                 }
1796
1797                 {
1798                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_discard", "Discard");
1799
1800                         cullEfficiencyGroup->addChild(group);
1801
1802                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_256",      "Parts of occluder geometry discarded", 256));
1803                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_128",      "Parts of occluder geometry discarded", 128));
1804                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_64",       "Parts of occluder geometry discarded", 64));
1805                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_32",       "Parts of occluder geometry discarded", 32));
1806                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_16",       "Parts of occluder geometry discarded", 16));
1807                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_8",        "Parts of occluder geometry discarded", 8));
1808                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_4",        "Parts of occluder geometry discarded", 4));
1809                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_2",        "Parts of occluder geometry discarded", 2));
1810                         group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_1",        "Parts of occluder geometry discarded", 1));
1811                 }
1812
1813                 {
1814                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "partial_coverage", "Partial Coverage");
1815
1816                         cullEfficiencyGroup->addChild(group);
1817
1818                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "100", "Occluder covering only part of occluded geometry", 1.00f));
1819                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "099", "Occluder covering only part of occluded geometry", 0.99f));
1820                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "095", "Occluder covering only part of occluded geometry", 0.95f));
1821                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "090", "Occluder covering only part of occluded geometry", 0.90f));
1822                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "080", "Occluder covering only part of occluded geometry", 0.80f));
1823                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "070", "Occluder covering only part of occluded geometry", 0.70f));
1824                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "050", "Occluder covering only part of occluded geometry", 0.50f));
1825                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "025", "Occluder covering only part of occluded geometry", 0.25f));
1826                         group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "010", "Occluder covering only part of occluded geometry", 0.10f));
1827                 }
1828
1829                 {
1830                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Partial Coverage");
1831
1832                         cullEfficiencyGroup->addChild(group);
1833
1834                         group->addChild(new StaticOccluderFragDepthCullCase( testCtx, renderCtx, "occluder_static", ""));
1835                         group->addChild(new DynamicOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_dynamic", ""));
1836                         group->addChild(new StaticOccludedFragDepthCullCase( testCtx, renderCtx, "occluded_static", ""));
1837                         group->addChild(new DynamicOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_dynamic", ""));
1838                 }
1839
1840                 {
1841                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "order", "Rendering order");
1842
1843                         cullEfficiencyGroup->addChild(group);
1844
1845                         group->addChild(new ReversedDepthOrderCullCase(testCtx, renderCtx, "reversed", "Back to front rendering order"));
1846                 }
1847         }
1848
1849         {
1850                 tcu::TestCaseGroup* const testCostGroup = new tcu::TestCaseGroup(m_testCtx, "culled_pixel_cost", "Fragment cull efficiency");
1851
1852                 addChild(testCostGroup);
1853
1854                 {
1855                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "gradient", "Gradients with small depth differences");
1856
1857                         testCostGroup->addChild(group);
1858
1859                         group->addChild(new BaseCostCase(testCtx, renderCtx, "flat", ""));
1860                         group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_050", "", 0.50f));
1861                         group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_010", "", 0.10f));
1862                         group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_005", "", 0.05f));
1863                         group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_002", "", 0.02f));
1864                         group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_001", "", 0.01f));
1865                 }
1866
1867                 {
1868                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_geometry", "Occluders with varying geometry complexity");
1869
1870                         testCostGroup->addChild(group);
1871
1872                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_5",   "", 5,   0.0f, 0.0f));
1873                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_15",  "", 15,  0.0f, 0.0f));
1874                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_25",  "", 25,  0.0f, 0.0f));
1875                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_50",  "", 50,  0.0f, 0.0f));
1876                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_100", "", 100, 0.0f, 0.0f));
1877
1878                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_5",   "", 5,   1.0f/5.0f,   0.0f));
1879                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_15",  "", 15,  1.0f/15.0f,  0.0f));
1880                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_25",  "", 25,  1.0f/25.0f,  0.0f));
1881                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_50",  "", 50,  1.0f/50.0f,  0.0f));
1882                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_100", "", 100, 1.0f/100.0f, 0.0f));
1883
1884                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_5",   "", 5,   0.0f, 0.2f));
1885                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_15",  "", 15,  0.0f, 0.2f));
1886                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_25",  "", 25,  0.0f, 0.2f));
1887                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_50",  "", 50,  0.0f, 0.2f));
1888                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_100", "", 100, 0.0f, 0.2f));
1889
1890                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_5",   "", 5,   1.0f/5.0f,   0.2f));
1891                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_15",  "", 15,  1.0f/15.0f,  0.2f));
1892                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_25",  "", 25,  1.0f/25.0f,  0.2f));
1893                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_50",  "", 50,  1.0f/50.0f,  0.2f));
1894                         group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_100", "", 100, 1.0f/100.0f, 0.2f));
1895                 }
1896
1897                 {
1898                         tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Modifying gl_FragDepth");
1899
1900                         testCostGroup->addChild(group);
1901
1902                         group->addChild(new OccluderStaticFragDepthCostCase( testCtx, renderCtx, "occluder_static", ""));
1903                         group->addChild(new OccluderDynamicFragDepthCostCase(testCtx, renderCtx, "occluder_dynamic", ""));
1904                         group->addChild(new OccludedStaticFragDepthCostCase( testCtx, renderCtx, "occluded_static", ""));
1905                         group->addChild(new OccludedDynamicFragDepthCostCase(testCtx, renderCtx, "occluded_dynamic", ""));
1906                 }
1907         }
1908 }
1909
1910 } // Performance
1911 } // gles3
1912 } // deqp