Merge changes from topic 'am-afbe65226c664d438460b33cb5a405a8' into nyc-mr1-dev am...
[platform/upstream/VK-GL-CTS.git] / modules / gles3 / performance / es3pBufferDataUploadTests.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Buffer data upload performance tests.
22  *//*--------------------------------------------------------------------*/
23
24 #include "es3pBufferDataUploadTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "tcuTestLog.hpp"
27 #include "tcuVectorUtil.hpp"
28 #include "tcuSurface.hpp"
29 #include "tcuCPUWarmup.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "gluRenderContext.hpp"
32 #include "gluShaderProgram.hpp"
33 #include "gluStrUtil.hpp"
34 #include "gluPixelTransfer.hpp"
35 #include "gluObjectWrapper.hpp"
36 #include "glwFunctions.hpp"
37 #include "glwEnums.hpp"
38 #include "deClock.h"
39 #include "deMath.h"
40 #include "deStringUtil.hpp"
41 #include "deRandom.hpp"
42 #include "deMemory.h"
43 #include "deThread.h"
44 #include "deMeta.hpp"
45
46 #include <algorithm>
47 #include <iomanip>
48 #include <limits>
49
50 namespace deqp
51 {
52 namespace gles3
53 {
54 namespace Performance
55 {
56 namespace
57 {
58
59 using gls::theilSenSiegelLinearRegression;
60 using gls::LineParametersWithConfidence;
61 using de::meta::EnableIf;
62 using de::meta::Not;
63
64 static const char* const s_dummyVertexShader =          "#version 300 es\n"
65                                                                                                         "in highp vec4 a_position;\n"
66                                                                                                         "void main (void)\n"
67                                                                                                         "{\n"
68                                                                                                         "       gl_Position = a_position;\n"
69                                                                                                         "}\n";
70
71 static const char* const s_dummyFragnentShader =        "#version 300 es\n"
72                                                                                                         "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
73                                                                                                         "void main (void)\n"
74                                                                                                         "{\n"
75                                                                                                         "       dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
76                                                                                                         "}\n";
77
78 static const char* const s_colorVertexShader =          "#version 300 es\n"
79                                                                                                         "in highp vec4 a_position;\n"
80                                                                                                         "in highp vec4 a_color;\n"
81                                                                                                         "out highp vec4 v_color;\n"
82                                                                                                         "void main (void)\n"
83                                                                                                         "{\n"
84                                                                                                         "       gl_Position = a_position;\n"
85                                                                                                         "       v_color = a_color;\n"
86                                                                                                         "}\n";
87
88 static const char* const s_colorFragmentShader =        "#version 300 es\n"
89                                                                                                         "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
90                                                                                                         "in mediump vec4 v_color;\n"
91                                                                                                         "void main (void)\n"
92                                                                                                         "{\n"
93                                                                                                         "       dEQP_FragColor = v_color;\n"
94                                                                                                         "}\n";
95
96 struct SingleOperationDuration
97 {
98         deUint64 totalDuration;
99         deUint64 fitResponseDuration; // used for fitting
100 };
101
102 struct MapBufferRangeDuration
103 {
104         deUint64 mapDuration;
105         deUint64 unmapDuration;
106         deUint64 writeDuration;
107         deUint64 allocDuration;
108         deUint64 totalDuration;
109
110         deUint64 fitResponseDuration;
111 };
112
113 struct MapBufferRangeDurationNoAlloc
114 {
115         deUint64 mapDuration;
116         deUint64 unmapDuration;
117         deUint64 writeDuration;
118         deUint64 totalDuration;
119
120         deUint64 fitResponseDuration;
121 };
122
123 struct MapBufferRangeFlushDuration
124 {
125         deUint64 mapDuration;
126         deUint64 unmapDuration;
127         deUint64 writeDuration;
128         deUint64 flushDuration;
129         deUint64 allocDuration;
130         deUint64 totalDuration;
131
132         deUint64 fitResponseDuration;
133 };
134
135 struct MapBufferRangeFlushDurationNoAlloc
136 {
137         deUint64 mapDuration;
138         deUint64 unmapDuration;
139         deUint64 writeDuration;
140         deUint64 flushDuration;
141         deUint64 totalDuration;
142
143         deUint64 fitResponseDuration;
144 };
145
146 struct RenderReadDuration
147 {
148         deUint64 renderDuration;
149         deUint64 readDuration;
150         deUint64 renderReadDuration;
151         deUint64 totalDuration;
152
153         deUint64 fitResponseDuration;
154 };
155
156 struct UnrelatedUploadRenderReadDuration
157 {
158         deUint64 renderDuration;
159         deUint64 readDuration;
160         deUint64 renderReadDuration;
161         deUint64 totalDuration;
162
163         deUint64 fitResponseDuration;
164 };
165
166 struct UploadRenderReadDuration
167 {
168         deUint64 uploadDuration;
169         deUint64 renderDuration;
170         deUint64 readDuration;
171         deUint64 totalDuration;
172         deUint64 renderReadDuration;
173
174         deUint64 fitResponseDuration;
175 };
176
177 struct UploadRenderReadDurationWithUnrelatedUploadSize
178 {
179         deUint64 uploadDuration;
180         deUint64 renderDuration;
181         deUint64 readDuration;
182         deUint64 totalDuration;
183         deUint64 renderReadDuration;
184
185         deUint64 fitResponseDuration;
186 };
187
188 struct RenderUploadRenderReadDuration
189 {
190         deUint64 firstRenderDuration;
191         deUint64 uploadDuration;
192         deUint64 secondRenderDuration;
193         deUint64 readDuration;
194         deUint64 totalDuration;
195         deUint64 renderReadDuration;
196
197         deUint64 fitResponseDuration;
198 };
199
200 template <typename SampleT>
201 struct UploadSampleResult
202 {
203         typedef SampleT SampleType;
204
205         int                     bufferSize;
206         int                     allocatedSize;
207         int                     writtenSize;
208         SampleType      duration;
209 };
210
211 template <typename SampleT>
212 struct RenderSampleResult
213 {
214         typedef SampleT SampleType;
215
216         int                     uploadedDataSize;
217         int                     renderDataSize;
218         int                     unrelatedDataSize;
219         int                     numVertices;
220         SampleT         duration;
221 };
222
223 struct SingleOperationStatistics
224 {
225         float minTime;
226         float maxTime;
227         float medianTime;
228         float min2DecileTime;           // !< minimum value in the 2nd decile
229         float max9DecileTime;           // !< maximum value in the 9th decile
230 };
231
232 struct SingleCallStatistics
233 {
234         SingleOperationStatistics       result;
235
236         float                                           medianRate;
237         float                                           maxDiffTime;
238         float                                           maxDiff9DecileTime;
239         float                                           medianDiffTime;
240
241         float                                           maxRelDiffTime;
242         float                                           max9DecileRelDiffTime;
243         float                                           medianRelDiffTime;
244 };
245
246 struct MapCallStatistics
247 {
248         SingleOperationStatistics       map;
249         SingleOperationStatistics       unmap;
250         SingleOperationStatistics       write;
251         SingleOperationStatistics       alloc;
252         SingleOperationStatistics       result;
253
254         float                                           medianRate;
255         float                                           maxDiffTime;
256         float                                           maxDiff9DecileTime;
257         float                                           medianDiffTime;
258
259         float                                           maxRelDiffTime;
260         float                                           max9DecileRelDiffTime;
261         float                                           medianRelDiffTime;
262 };
263
264 struct MapFlushCallStatistics
265 {
266         SingleOperationStatistics       map;
267         SingleOperationStatistics       unmap;
268         SingleOperationStatistics       write;
269         SingleOperationStatistics       flush;
270         SingleOperationStatistics       alloc;
271         SingleOperationStatistics       result;
272
273         float                                           medianRate;
274         float                                           maxDiffTime;
275         float                                           maxDiff9DecileTime;
276         float                                           medianDiffTime;
277
278         float                                           maxRelDiffTime;
279         float                                           max9DecileRelDiffTime;
280         float                                           medianRelDiffTime;
281 };
282
283 struct RenderReadStatistics
284 {
285         SingleOperationStatistics       render;
286         SingleOperationStatistics       read;
287         SingleOperationStatistics       result;
288         SingleOperationStatistics       total;
289
290         float                                           medianRate;
291         float                                           maxDiffTime;
292         float                                           maxDiff9DecileTime;
293         float                                           medianDiffTime;
294
295         float                                           maxRelDiffTime;
296         float                                           max9DecileRelDiffTime;
297         float                                           medianRelDiffTime;
298 };
299
300 struct UploadRenderReadStatistics
301 {
302         SingleOperationStatistics       upload;
303         SingleOperationStatistics       render;
304         SingleOperationStatistics       read;
305         SingleOperationStatistics       result;
306         SingleOperationStatistics       total;
307
308         float                                           medianRate;
309         float                                           maxDiffTime;
310         float                                           maxDiff9DecileTime;
311         float                                           medianDiffTime;
312
313         float                                           maxRelDiffTime;
314         float                                           max9DecileRelDiffTime;
315         float                                           medianRelDiffTime;
316 };
317
318 struct RenderUploadRenderReadStatistics
319 {
320         SingleOperationStatistics       firstRender;
321         SingleOperationStatistics       upload;
322         SingleOperationStatistics       secondRender;
323         SingleOperationStatistics       read;
324         SingleOperationStatistics       result;
325         SingleOperationStatistics       total;
326
327         float                                           medianRate;
328         float                                           maxDiffTime;
329         float                                           maxDiff9DecileTime;
330         float                                           medianDiffTime;
331
332         float                                           maxRelDiffTime;
333         float                                           max9DecileRelDiffTime;
334         float                                           medianRelDiffTime;
335 };
336
337 template <typename T>
338 struct SampleTypeTraits
339 {
340 };
341
342 template <>
343 struct SampleTypeTraits<SingleOperationDuration>
344 {
345         typedef SingleCallStatistics StatsType;
346
347         enum { HAS_MAP_STATS            = 0     };
348         enum { HAS_UNMAP_STATS          = 0     };
349         enum { HAS_WRITE_STATS          = 0     };
350         enum { HAS_FLUSH_STATS          = 0     };
351         enum { HAS_ALLOC_STATS          = 0     };
352         enum { LOG_CONTRIBUTIONS        = 0     };
353 };
354
355 template <>
356 struct SampleTypeTraits<MapBufferRangeDuration>
357 {
358         typedef MapCallStatistics StatsType;
359
360         enum { HAS_MAP_STATS            = 1     };
361         enum { HAS_UNMAP_STATS          = 1     };
362         enum { HAS_WRITE_STATS          = 1     };
363         enum { HAS_FLUSH_STATS          = 0     };
364         enum { HAS_ALLOC_STATS          = 1     };
365         enum { LOG_CONTRIBUTIONS        = 1     };
366 };
367
368 template <>
369 struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
370 {
371         typedef MapCallStatistics StatsType;
372
373         enum { HAS_MAP_STATS            = 1     };
374         enum { HAS_UNMAP_STATS          = 1     };
375         enum { HAS_WRITE_STATS          = 1     };
376         enum { HAS_FLUSH_STATS          = 0     };
377         enum { HAS_ALLOC_STATS          = 0     };
378         enum { LOG_CONTRIBUTIONS        = 1     };
379 };
380
381 template <>
382 struct SampleTypeTraits<MapBufferRangeFlushDuration>
383 {
384         typedef MapFlushCallStatistics StatsType;
385
386         enum { HAS_MAP_STATS            = 1     };
387         enum { HAS_UNMAP_STATS          = 1     };
388         enum { HAS_WRITE_STATS          = 1     };
389         enum { HAS_FLUSH_STATS          = 1     };
390         enum { HAS_ALLOC_STATS          = 1     };
391         enum { LOG_CONTRIBUTIONS        = 1     };
392 };
393
394 template <>
395 struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
396 {
397         typedef MapFlushCallStatistics StatsType;
398
399         enum { HAS_MAP_STATS            = 1     };
400         enum { HAS_UNMAP_STATS          = 1     };
401         enum { HAS_WRITE_STATS          = 1     };
402         enum { HAS_FLUSH_STATS          = 1     };
403         enum { HAS_ALLOC_STATS          = 0     };
404         enum { LOG_CONTRIBUTIONS        = 1     };
405 };
406
407 template <>
408 struct SampleTypeTraits<RenderReadDuration>
409 {
410         typedef RenderReadStatistics StatsType;
411
412         enum { HAS_RENDER_STATS                 = 1     };
413         enum { HAS_READ_STATS                   = 1     };
414         enum { HAS_UPLOAD_STATS                 = 0     };
415         enum { HAS_TOTAL_STATS                  = 1     };
416         enum { HAS_FIRST_RENDER_STATS   = 0     };
417         enum { HAS_SECOND_RENDER_STATS  = 0     };
418
419         enum { LOG_CONTRIBUTIONS        = 1     };
420 };
421
422 template <>
423 struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
424 {
425         typedef RenderReadStatistics StatsType;
426
427         enum { HAS_RENDER_STATS                 = 1     };
428         enum { HAS_READ_STATS                   = 1     };
429         enum { HAS_UPLOAD_STATS                 = 0     };
430         enum { HAS_TOTAL_STATS                  = 1     };
431         enum { HAS_FIRST_RENDER_STATS   = 0     };
432         enum { HAS_SECOND_RENDER_STATS  = 0     };
433
434         enum { LOG_CONTRIBUTIONS        = 1     };
435 };
436
437 template <>
438 struct SampleTypeTraits<UploadRenderReadDuration>
439 {
440         typedef UploadRenderReadStatistics StatsType;
441
442         enum { HAS_RENDER_STATS                 = 1     };
443         enum { HAS_READ_STATS                   = 1     };
444         enum { HAS_UPLOAD_STATS                 = 1     };
445         enum { HAS_TOTAL_STATS                  = 1     };
446         enum { HAS_FIRST_RENDER_STATS   = 0     };
447         enum { HAS_SECOND_RENDER_STATS  = 0     };
448
449         enum { LOG_CONTRIBUTIONS                        = 1     };
450         enum { LOG_UNRELATED_UPLOAD_SIZE        = 0 };
451 };
452
453 template <>
454 struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
455 {
456         typedef UploadRenderReadStatistics StatsType;
457
458         enum { HAS_RENDER_STATS                 = 1     };
459         enum { HAS_READ_STATS                   = 1     };
460         enum { HAS_UPLOAD_STATS                 = 1     };
461         enum { HAS_TOTAL_STATS                  = 1     };
462         enum { HAS_FIRST_RENDER_STATS   = 0     };
463         enum { HAS_SECOND_RENDER_STATS  = 0     };
464
465         enum { LOG_CONTRIBUTIONS                        = 1     };
466         enum { LOG_UNRELATED_UPLOAD_SIZE        = 1 };
467 };
468
469 template <>
470 struct SampleTypeTraits<RenderUploadRenderReadDuration>
471 {
472         typedef RenderUploadRenderReadStatistics StatsType;
473
474         enum { HAS_RENDER_STATS                 = 0     };
475         enum { HAS_READ_STATS                   = 1     };
476         enum { HAS_UPLOAD_STATS                 = 1     };
477         enum { HAS_TOTAL_STATS                  = 1     };
478         enum { HAS_FIRST_RENDER_STATS   = 1     };
479         enum { HAS_SECOND_RENDER_STATS  = 1     };
480
481         enum { LOG_CONTRIBUTIONS                        = 1     };
482         enum { LOG_UNRELATED_UPLOAD_SIZE        = 1 };
483 };
484
485 struct UploadSampleAnalyzeResult
486 {
487         float transferRateMedian;
488         float transferRateAtRange;
489         float transferRateAtInfinity;
490 };
491
492 struct RenderSampleAnalyzeResult
493 {
494         float renderRateMedian;
495         float renderRateAtRange;
496         float renderRateAtInfinity;
497 };
498
499 class UnmapFailureError : public std::exception
500 {
501 public:
502         UnmapFailureError (void) : std::exception() {}
503 };
504
505 static std::string getHumanReadableByteSize (int numBytes)
506 {
507         std::ostringstream buf;
508
509         if (numBytes < 1024)
510                 buf << numBytes << " byte(s)";
511         else if (numBytes < 1024 * 1024)
512                 buf << de::floatToString((float)numBytes/1024.0f, 1) << " KiB";
513         else
514                 buf << de::floatToString((float)numBytes/1024.0f/1024.0f, 1) << " MiB";
515
516         return buf.str();
517 }
518
519 static deUint64 medianTimeMemcpy (void* dst, const void* src, int numBytes)
520 {
521         // Time used by memcpy is assumed to be asymptotically linear
522
523         // With large numBytes, the probability of context switch or other random
524         // event is high. Apply memcpy in parts and report how much time would
525         // memcpy have used with the median transfer rate.
526
527         // Less than 1MiB, no need to do anything special
528         if (numBytes < 1048576)
529         {
530                 deUint64 startTime;
531                 deUint64 endTime;
532
533                 deYield();
534
535                 startTime = deGetMicroseconds();
536                 deMemcpy(dst, src, numBytes);
537                 endTime = deGetMicroseconds();
538
539                 return endTime - startTime;
540         }
541         else
542         {
543                 // Do memcpy in multiple parts
544
545                 const int       numSections             = 5;
546                 const int       sectionAlign    = 16;
547
548                 int                     sectionStarts[numSections+1];
549                 int                     sectionLens[numSections];
550                 deUint64        sectionTimes[numSections];
551                 deUint64        medianTime;
552                 deUint64        bestTime                = 0;
553
554                 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
555                         sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
556                 sectionStarts[numSections] = numBytes;
557
558                 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
559                         sectionLens[sectionNdx] = sectionStarts[sectionNdx+1] - sectionStarts[sectionNdx];
560
561                 // Memcpy is usually called after mapbuffer range which may take
562                 // a lot of time. To prevent power management from kicking in during
563                 // copy, warm up more.
564                 {
565                         deYield();
566                         tcu::warmupCPU();
567                         deYield();
568                 }
569
570                 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
571                 {
572                         deUint64 startTime;
573                         deUint64 endTime;
574
575                         startTime = deGetMicroseconds();
576                         deMemcpy((deUint8*)dst + sectionStarts[sectionNdx], (const deUint8*)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]);
577                         endTime = deGetMicroseconds();
578
579                         sectionTimes[sectionNdx] = endTime - startTime;
580
581                         if (!bestTime || sectionTimes[sectionNdx] < bestTime)
582                                 bestTime = sectionTimes[sectionNdx];
583
584                         // Detect if write takes 50% longer than it should, and warm up if that happened
585                         if (sectionNdx != numSections-1 && (float)sectionTimes[sectionNdx] > 1.5f * (float)bestTime)
586                         {
587                                 deYield();
588                                 tcu::warmupCPU();
589                                 deYield();
590                         }
591                 }
592
593                 std::sort(sectionTimes, sectionTimes + numSections);
594
595                 if ((numSections % 2) == 0)
596                         medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
597                 else
598                         medianTime = sectionTimes[numSections / 2];
599
600                 return medianTime*numSections;
601         }
602 }
603
604 static float dummyCalculation (float initial, int workSize)
605 {
606         float   a = initial;
607         int             b = 123;
608
609         for (int ndx = 0; ndx < workSize; ++ndx)
610         {
611                 a = deFloatCos(a + (float)b);
612                 b = (b + 63) % 107 + de::abs((int)(a*10.0f));
613         }
614
615         return a + (float)b;
616 }
617
618 static void busyWait (int microseconds)
619 {
620         const deUint64  maxSingleWaitTime       = 1000; // 1ms
621         const deUint64  endTime                         = deGetMicroseconds() + microseconds;
622         float                   dummy                           = *tcu::warmupCPUInternal::g_dummy.m_v;
623         int                             workSize                        = 500;
624
625         // exponentially increase work, cap to 1ms
626         while (deGetMicroseconds() < endTime)
627         {
628                 const deUint64  startTime               = deGetMicroseconds();
629                 deUint64                totalTime;
630
631                 dummy = dummyCalculation(dummy, workSize);
632
633                 totalTime = deGetMicroseconds() - startTime;
634
635                 if (totalTime >= maxSingleWaitTime)
636                         break;
637                 else
638                         workSize *= 2;
639         }
640
641         // "wait"
642         while (deGetMicroseconds() < endTime)
643                 dummy = dummyCalculation(dummy, workSize);
644
645         *tcu::warmupCPUInternal::g_dummy.m_v = dummy;
646 }
647
648 // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
649 template <typename T>
650 static float linearSample (const std::vector<T>& values, float position)
651 {
652         DE_ASSERT(position >= 0.0f);
653         DE_ASSERT(position <= 1.0f);
654
655         const float     floatNdx                        = (float)(values.size() - 1) * position;
656         const int       lowerNdx                        = (int)deFloatFloor(floatNdx);
657         const int       higherNdx                       = lowerNdx + 1;
658         const float     interpolationFactor = floatNdx - (float)lowerNdx;
659
660         DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
661         DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
662         DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);
663
664         return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
665 }
666
667 template <typename T>
668 SingleOperationStatistics calculateSingleOperationStatistics (const std::vector<T>& samples, deUint64 T::SampleType::*target)
669 {
670         SingleOperationStatistics       stats;
671         std::vector<deUint64>           values(samples.size());
672
673         for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
674                 values[ndx] = samples[ndx].duration.*target;
675
676         std::sort(values.begin(), values.end());
677
678         stats.minTime                   = (float)values.front();
679         stats.maxTime                   = (float)values.back();
680         stats.medianTime                = linearSample(values, 0.5f);
681         stats.min2DecileTime    = linearSample(values, 0.1f);
682         stats.max9DecileTime    = linearSample(values, 0.9f);
683
684         return stats;
685 }
686
687 template <typename StatisticsType, typename SampleType>
688 void calculateBasicStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples, int SampleType::*predictor)
689 {
690         std::vector<deUint64> values(samples.size());
691
692         for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
693                 values[ndx] = samples[ndx].duration.fitResponseDuration;
694
695         // median rate
696         {
697                 std::vector<float> processingRates(samples.size());
698
699                 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
700                 {
701                         const float timeInSeconds = (float)values[ndx] / 1000.0f / 1000.0f;
702                         processingRates[ndx] = (float)(samples[ndx].*predictor) / timeInSeconds;
703                 }
704
705                 std::sort(processingRates.begin(), processingRates.end());
706
707                 stats.medianRate = linearSample(processingRates, 0.5f);
708         }
709
710         // results compared to the approximation
711         {
712                 std::vector<float> timeDiffs(samples.size());
713
714                 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
715                 {
716                         const float prediction  = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
717                         const float actual              = (float)values[ndx];
718                         timeDiffs[ndx] = actual - prediction;
719                 }
720                 std::sort(timeDiffs.begin(), timeDiffs.end());
721
722                 stats.maxDiffTime                       = timeDiffs.back();
723                 stats.maxDiff9DecileTime        = linearSample(timeDiffs, 0.9f);
724                 stats.medianDiffTime            = linearSample(timeDiffs, 0.5f);
725         }
726
727         // relative comparison to the approximation
728         {
729                 std::vector<float> relativeDiffs(samples.size());
730
731                 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
732                 {
733                         const float prediction  = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
734                         const float actual              = (float)values[ndx];
735
736                         // Ignore cases where we predict negative times, or if
737                         // ratio would be (nearly) infinite: ignore if predicted
738                         // time is less than 1 microsecond
739                         if (prediction < 1.0f)
740                                 relativeDiffs[ndx] = 0.0f;
741                         else
742                                 relativeDiffs[ndx] = (actual - prediction) / prediction;
743                 }
744                 std::sort(relativeDiffs.begin(), relativeDiffs.end());
745
746                 stats.maxRelDiffTime            = relativeDiffs.back();
747                 stats.max9DecileRelDiffTime     = linearSample(relativeDiffs, 0.9f);
748                 stats.medianRelDiffTime         = linearSample(relativeDiffs, 0.5f);
749         }
750
751         // values calculated using sorted timings
752
753         std::sort(values.begin(), values.end());
754
755         stats.result.minTime = (float)values.front();
756         stats.result.maxTime = (float)values.back();
757         stats.result.medianTime = linearSample(values, 0.5f);
758         stats.result.min2DecileTime = linearSample(values, 0.1f);
759         stats.result.max9DecileTime = linearSample(values, 0.9f);
760 }
761
762 template <typename StatisticsType, typename SampleType>
763 void calculateBasicTransferStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
764 {
765         calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
766 }
767
768 template <typename StatisticsType, typename SampleType>
769 void calculateBasicRenderStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
770 {
771         calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
772 }
773
774 static SingleCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
775 {
776         SingleCallStatistics stats;
777
778         calculateBasicTransferStatistics(stats, fit, samples);
779
780         return stats;
781 }
782
783 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
784 {
785         MapCallStatistics stats;
786
787         calculateBasicTransferStatistics(stats, fit, samples);
788
789         stats.map       = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
790         stats.unmap     = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
791         stats.write     = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
792         stats.alloc     = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);
793
794         return stats;
795 }
796
797 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
798 {
799         MapFlushCallStatistics stats;
800
801         calculateBasicTransferStatistics(stats, fit, samples);
802
803         stats.map       = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
804         stats.unmap     = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
805         stats.write     = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
806         stats.flush     = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
807         stats.alloc     = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);
808
809         return stats;
810 }
811
812 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
813 {
814         MapCallStatistics stats;
815
816         calculateBasicTransferStatistics(stats, fit, samples);
817
818         stats.map       = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
819         stats.unmap     = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
820         stats.write     = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);
821
822         return stats;
823 }
824
825 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
826 {
827         MapFlushCallStatistics stats;
828
829         calculateBasicTransferStatistics(stats, fit, samples);
830
831         stats.map       = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
832         stats.unmap     = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
833         stats.write     = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
834         stats.flush     = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);
835
836         return stats;
837 }
838
839 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
840 {
841         RenderReadStatistics stats;
842
843         calculateBasicRenderStatistics(stats, fit, samples);
844
845         stats.render    = calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
846         stats.read              = calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
847         stats.total             = calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);
848
849         return stats;
850 }
851
852 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
853 {
854         RenderReadStatistics stats;
855
856         calculateBasicRenderStatistics(stats, fit, samples);
857
858         stats.render    = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
859         stats.read              = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
860         stats.total             = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);
861
862         return stats;
863 }
864
865 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
866 {
867         UploadRenderReadStatistics stats;
868
869         calculateBasicRenderStatistics(stats, fit, samples);
870
871         stats.upload    = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
872         stats.render    = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
873         stats.read              = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
874         stats.total             = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);
875
876         return stats;
877 }
878
879 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
880 {
881         UploadRenderReadStatistics stats;
882
883         calculateBasicRenderStatistics(stats, fit, samples);
884
885         stats.upload    = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
886         stats.render    = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
887         stats.read              = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
888         stats.total             = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);
889
890         return stats;
891 }
892
893 static RenderUploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
894 {
895         RenderUploadRenderReadStatistics stats;
896
897         calculateBasicRenderStatistics(stats, fit, samples);
898
899         stats.firstRender       = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
900         stats.upload            = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
901         stats.secondRender      = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
902         stats.read                      = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
903         stats.total                     = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);
904
905         return stats;
906 }
907
908 template <typename DurationType>
909 static LineParametersWithConfidence fitLineToSamples (const std::vector<UploadSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
910 {
911         std::vector<tcu::Vec2> samplePoints;
912
913         for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
914         {
915                 tcu::Vec2 point;
916
917                 point.x() = (float)(samples[sampleNdx].writtenSize);
918                 point.y() = (float)(samples[sampleNdx].duration.*target);
919
920                 samplePoints.push_back(point);
921         }
922
923         return theilSenSiegelLinearRegression(samplePoints, 0.6f);
924 }
925
926 template <typename DurationType>
927 static LineParametersWithConfidence fitLineToSamples (const std::vector<RenderSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
928 {
929         std::vector<tcu::Vec2> samplePoints;
930
931         for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
932         {
933                 tcu::Vec2 point;
934
935                 point.x() = (float)(samples[sampleNdx].renderDataSize);
936                 point.y() = (float)(samples[sampleNdx].duration.*target);
937
938                 samplePoints.push_back(point);
939         }
940
941         return theilSenSiegelLinearRegression(samplePoints, 0.6f);
942 }
943
944 template <typename T>
945 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, int beginNdx, int endNdx, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
946 {
947         return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
948 }
949
950 template <typename T>
951 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
952 {
953         return fitLineToSamples(samples, 0, (int)samples.size(), target);
954 }
955
956 static float getAreaBetweenLines (float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient)
957 {
958         const float lineAMin            = lineAOffset + lineACoefficient * xmin;
959         const float lineAMax            = lineAOffset + lineACoefficient * xmax;
960         const float lineBMin            = lineBOffset + lineBCoefficient * xmin;
961         const float lineBMax            = lineBOffset + lineBCoefficient * xmax;
962         const bool      aOverBAtBegin   = (lineAMin > lineBMin);
963         const bool      aOverBAtEnd             = (lineAMax > lineBMax);
964
965         if (aOverBAtBegin == aOverBAtEnd)
966         {
967                 // lines do not intersect
968
969                 const float midpoint    = (xmin + xmax) / 2.0f;
970                 const float width               = (xmax - xmin);
971
972                 const float lineAHeight = lineAOffset + lineACoefficient * midpoint;
973                 const float lineBHeight = lineBOffset + lineBCoefficient * midpoint;
974
975                 return width * de::abs(lineAHeight - lineBHeight);
976         }
977         else
978         {
979
980                 // lines intersect
981
982                 const float approachCoeffient   = de::abs(lineACoefficient - lineBCoefficient);
983                 const float epsilon                             = 0.0001f;
984                 const float leftHeight                  = de::abs(lineAMin - lineBMin);
985                 const float rightHeight                 = de::abs(lineAMax - lineBMax);
986
987                 if (approachCoeffient < epsilon)
988                         return 0.0f;
989
990                 return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient));
991         }
992 }
993
994 template <typename T>
995 static float calculateSampleFitLinearity (const std::vector<T>& samples, int T::*predictor)
996 {
997         // Compare the fitted line of first half of the samples to the fitted line of
998         // the second half of the samples. Calculate a AABB that fully contains every
999         // sample's x component and both fit lines in this range. Calculate the ratio
1000         // of the area between the lines and the AABB.
1001
1002         const float                             epsilon                         = 1.e-6f;
1003         const int                               midPoint                        = (int)samples.size() / 2;
1004         const LineParametersWithConfidence      startApproximation      = fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
1005         const LineParametersWithConfidence      endApproximation        = fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);
1006
1007         const float                             aabbMinX                        = (float)(samples.front().*predictor);
1008         const float                             aabbMinY                        = de::min(startApproximation.offset + startApproximation.coefficient*aabbMinX, endApproximation.offset + endApproximation.coefficient*aabbMinX);
1009         const float                             aabbMaxX                        = (float)(samples.back().*predictor);
1010         const float                             aabbMaxY                        = de::max(startApproximation.offset + startApproximation.coefficient*aabbMaxX, endApproximation.offset + endApproximation.coefficient*aabbMaxX);
1011
1012         const float                             aabbArea                        = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1013         const float                             areaBetweenLines        = getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient);
1014         const float                             errorAreaRatio          = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1015
1016         return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1017 }
1018
1019 template <typename DurationType>
1020 static float calculateSampleFitLinearity (const std::vector<UploadSampleResult<DurationType> >& samples)
1021 {
1022         return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
1023 }
1024
1025 template <typename DurationType>
1026 static float calculateSampleFitLinearity (const std::vector<RenderSampleResult<DurationType> >& samples)
1027 {
1028         return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
1029 }
1030
1031 template <typename T>
1032 static float calculateSampleTemporalStability (const std::vector<T>& samples, int T::*predictor)
1033 {
1034         // Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
1035         // Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
1036         // contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
1037         // the lines and the AABB.
1038
1039         const float                             epsilon                         = 1.e-6f;
1040         const LineParametersWithConfidence      evenApproximation       = fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1041         const LineParametersWithConfidence      oddApproximation        = fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1042
1043         const float                             aabbMinX                        = (float)(samples.front().*predictor);
1044         const float                             aabbMinY                        = de::min(evenApproximation.offset + evenApproximation.coefficient*aabbMinX, oddApproximation.offset + oddApproximation.coefficient*aabbMinX);
1045         const float                             aabbMaxX                        = (float)(samples.back().*predictor);
1046         const float                             aabbMaxY                        = de::max(evenApproximation.offset + evenApproximation.coefficient*aabbMaxX, oddApproximation.offset + oddApproximation.coefficient*aabbMaxX);
1047
1048         const float                             aabbArea                        = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1049         const float                             areaBetweenLines        = getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient);
1050         const float                             errorAreaRatio          = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1051
1052         return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1053 }
1054
1055 template <typename DurationType>
1056 static float calculateSampleTemporalStability (const std::vector<UploadSampleResult<DurationType> >& samples)
1057 {
1058         return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
1059 }
1060
1061 template <typename DurationType>
1062 static float calculateSampleTemporalStability (const std::vector<RenderSampleResult<DurationType> >& samples)
1063 {
1064         return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
1065 }
1066
1067 template <typename DurationType>
1068 static void bucketizeSamplesUniformly (const std::vector<UploadSampleResult<DurationType> >& samples, std::vector<UploadSampleResult<DurationType> >* buckets, int numBuckets, int& minBufferSize, int& maxBufferSize)
1069 {
1070         minBufferSize = 0;
1071         maxBufferSize = 0;
1072
1073         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1074         {
1075                 DE_ASSERT(samples[sampleNdx].allocatedSize != 0);
1076
1077                 if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
1078                         minBufferSize = samples[sampleNdx].allocatedSize;
1079                 if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
1080                         maxBufferSize = samples[sampleNdx].allocatedSize;
1081         }
1082
1083         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1084         {
1085                 const float bucketNdxFloat      = (float)(samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * (float)numBuckets;
1086                 const int bucketNdx                     = de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets-1);
1087
1088                 buckets[bucketNdx].push_back(samples[sampleNdx]);
1089         }
1090 }
1091
1092 template <typename SampleType>
1093 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1094 {
1095         log     << tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
1096                 << tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
1097                 << tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime)
1098                 << tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime)
1099                 << tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1100 }
1101
1102 template <typename SampleType>
1103 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1104 {
1105         log     << tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
1106                 << tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
1107                 << tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
1108                 << tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
1109                 << tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1110 }
1111
1112 template <typename SampleType>
1113 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1114 {
1115         log     << tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
1116                 << tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
1117                 << tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
1118                 << tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
1119                 << tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1120 }
1121
1122 template <typename SampleType>
1123 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1124 {
1125         log     << tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
1126                 << tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
1127                 << tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
1128                 << tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
1129                 << tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1130 }
1131
1132 template <typename SampleType>
1133 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1134 {
1135         log     << tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
1136                 << tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
1137                 << tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
1138                 << tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
1139                 << tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1140 }
1141
1142 template <typename SampleType>
1143 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1144 {
1145         DE_UNREF(log);
1146         DE_UNREF(stats);
1147 }
1148
1149 template <typename SampleType>
1150 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1151 {
1152         DE_UNREF(log);
1153         DE_UNREF(stats);
1154 }
1155
1156 template <typename SampleType>
1157 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1158 {
1159         DE_UNREF(log);
1160         DE_UNREF(stats);
1161 }
1162
1163 template <typename SampleType>
1164 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1165 {
1166         DE_UNREF(log);
1167         DE_UNREF(stats);
1168 }
1169
1170 template <typename SampleType>
1171 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1172 {
1173         DE_UNREF(log);
1174         DE_UNREF(stats);
1175 }
1176
1177 template <typename SampleType>
1178 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1179 {
1180         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
1181         log     << tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1182                 << tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1183                 << tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1184 }
1185
1186 template <typename SampleType>
1187 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1188 {
1189         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
1190         log     << tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1191                 << tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1192                 << tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1193 }
1194
1195 template <typename SampleType>
1196 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1197 {
1198         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
1199         log     << tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1200                 << tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1201                 << tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1202 }
1203
1204 template <typename SampleType>
1205 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1206 {
1207         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
1208         log     << tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1209                 << tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1210                 << tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1211 }
1212
1213 template <typename SampleType>
1214 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1215 {
1216         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
1217         log     << tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1218                 << tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1219                 << tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1220 }
1221
1222 template <typename SampleType>
1223 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1224 {
1225         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
1226         log     << tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1227                 << tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1228                 << tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime);
1229 }
1230
1231 template <typename SampleType>
1232 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1233 {
1234         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
1235         log     << tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1236                 << tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1237                 << tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
1238 }
1239
1240 template <typename SampleType>
1241 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1242 {
1243         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
1244         log     << tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1245                 << tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1246                 << tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime);
1247 }
1248
1249 template <typename SampleType>
1250 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1251 {
1252         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
1253         log     << tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1254                 << tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1255                 << tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
1256 }
1257
1258 template <typename SampleType>
1259 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1260 {
1261         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration);
1262         log     << tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1263                 << tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1264                 << tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime);
1265 }
1266
1267 template <typename SampleType>
1268 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1269 {
1270         const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration);
1271         log     << tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1272                 << tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1273                 << tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime);
1274 }
1275
1276 template <typename SampleType>
1277 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1278 {
1279         DE_UNREF(log);
1280         DE_UNREF(samples);
1281         DE_UNREF(stats);
1282 }
1283
1284 template <typename SampleType>
1285 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1286 {
1287         DE_UNREF(log);
1288         DE_UNREF(samples);
1289         DE_UNREF(stats);
1290 }
1291
1292 template <typename SampleType>
1293 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1294 {
1295         DE_UNREF(log);
1296         DE_UNREF(samples);
1297         DE_UNREF(stats);
1298 }
1299
1300 template <typename SampleType>
1301 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1302 {
1303         DE_UNREF(log);
1304         DE_UNREF(samples);
1305         DE_UNREF(stats);
1306 }
1307
1308 template <typename SampleType>
1309 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1310 {
1311         DE_UNREF(log);
1312         DE_UNREF(samples);
1313         DE_UNREF(stats);
1314 }
1315
1316 template <typename SampleType>
1317 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Value>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1318 {
1319         DE_UNREF(log);
1320         DE_UNREF(samples);
1321         DE_UNREF(stats);
1322 }
1323
1324 template <typename SampleType>
1325 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_READ_STATS>::Value>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1326 {
1327         DE_UNREF(log);
1328         DE_UNREF(samples);
1329         DE_UNREF(stats);
1330 }
1331
1332 template <typename SampleType>
1333 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1334 {
1335         DE_UNREF(log);
1336         DE_UNREF(samples);
1337         DE_UNREF(stats);
1338 }
1339
1340 template <typename SampleType>
1341 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Value>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1342 {
1343         DE_UNREF(log);
1344         DE_UNREF(samples);
1345         DE_UNREF(stats);
1346 }
1347
1348 template <typename SampleType>
1349 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1350 {
1351         DE_UNREF(log);
1352         DE_UNREF(samples);
1353         DE_UNREF(stats);
1354 }
1355
1356 template <typename SampleType>
1357 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1358 {
1359         DE_UNREF(log);
1360         DE_UNREF(samples);
1361         DE_UNREF(stats);
1362 }
1363
1364 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
1365 {
1366         log << tcu::TestLog::SampleList("Samples", "Samples")
1367                 << tcu::TestLog::SampleInfo
1368                 << tcu::TestLog::ValueInfo("WrittenSize",               "Written size",                 "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1369                 << tcu::TestLog::ValueInfo("BufferSize",                "Buffer size",                  "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1370                 << tcu::TestLog::ValueInfo("UploadTime",                "Upload time",                  "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1371                 << tcu::TestLog::ValueInfo("FitResidual",               "Fit residual",                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1372                 << tcu::TestLog::EndSampleInfo;
1373
1374         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1375         {
1376                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1377                 log     << tcu::TestLog::Sample
1378                         << samples[sampleNdx].writtenSize
1379                         << samples[sampleNdx].bufferSize
1380                         << (int)samples[sampleNdx].duration.totalDuration
1381                         << fitResidual
1382                         << tcu::TestLog::EndSample;
1383         }
1384
1385         log << tcu::TestLog::EndSampleList;
1386 }
1387
1388 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
1389 {
1390         log << tcu::TestLog::SampleList("Samples", "Samples")
1391                 << tcu::TestLog::SampleInfo
1392                 << tcu::TestLog::ValueInfo("WrittenSize",               "Written size",                 "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1393                 << tcu::TestLog::ValueInfo("BufferSize",                "Buffer size",                  "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1394                 << tcu::TestLog::ValueInfo("TotalTime",                 "Total time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1395                 << tcu::TestLog::ValueInfo("AllocTime",                 "Alloc time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1396                 << tcu::TestLog::ValueInfo("MapTime",                   "Map time",                             "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1397                 << tcu::TestLog::ValueInfo("UnmapTime",                 "Unmap time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1398                 << tcu::TestLog::ValueInfo("WriteTime",                 "Write time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1399                 << tcu::TestLog::ValueInfo("FitResidual",               "Fit residual",                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1400                 << tcu::TestLog::EndSampleInfo;
1401
1402         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1403         {
1404                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1405                 log     << tcu::TestLog::Sample
1406                         << samples[sampleNdx].writtenSize
1407                         << samples[sampleNdx].bufferSize
1408                         << (int)samples[sampleNdx].duration.totalDuration
1409                         << (int)samples[sampleNdx].duration.allocDuration
1410                         << (int)samples[sampleNdx].duration.mapDuration
1411                         << (int)samples[sampleNdx].duration.unmapDuration
1412                         << (int)samples[sampleNdx].duration.writeDuration
1413                         << fitResidual
1414                         << tcu::TestLog::EndSample;
1415         }
1416
1417         log << tcu::TestLog::EndSampleList;
1418 }
1419
1420 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
1421 {
1422         log << tcu::TestLog::SampleList("Samples", "Samples")
1423                 << tcu::TestLog::SampleInfo
1424                 << tcu::TestLog::ValueInfo("WrittenSize",               "Written size",                 "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1425                 << tcu::TestLog::ValueInfo("BufferSize",                "Buffer size",                  "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1426                 << tcu::TestLog::ValueInfo("TotalTime",                 "Total time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1427                 << tcu::TestLog::ValueInfo("MapTime",                   "Map time",                             "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1428                 << tcu::TestLog::ValueInfo("UnmapTime",                 "Unmap time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1429                 << tcu::TestLog::ValueInfo("WriteTime",                 "Write time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1430                 << tcu::TestLog::ValueInfo("FitResidual",               "Fit residual",                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1431                 << tcu::TestLog::EndSampleInfo;
1432
1433         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1434         {
1435                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1436                 log     << tcu::TestLog::Sample
1437                         << samples[sampleNdx].writtenSize
1438                         << samples[sampleNdx].bufferSize
1439                         << (int)samples[sampleNdx].duration.totalDuration
1440                         << (int)samples[sampleNdx].duration.mapDuration
1441                         << (int)samples[sampleNdx].duration.unmapDuration
1442                         << (int)samples[sampleNdx].duration.writeDuration
1443                         << fitResidual
1444                         << tcu::TestLog::EndSample;
1445         }
1446
1447         log << tcu::TestLog::EndSampleList;
1448 }
1449
1450 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
1451 {
1452         log << tcu::TestLog::SampleList("Samples", "Samples")
1453                 << tcu::TestLog::SampleInfo
1454                 << tcu::TestLog::ValueInfo("WrittenSize",               "Written size",                 "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1455                 << tcu::TestLog::ValueInfo("BufferSize",                "Buffer size",                  "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1456                 << tcu::TestLog::ValueInfo("TotalTime",                 "Total time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1457                 << tcu::TestLog::ValueInfo("AllocTime",                 "Alloc time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1458                 << tcu::TestLog::ValueInfo("MapTime",                   "Map time",                             "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1459                 << tcu::TestLog::ValueInfo("UnmapTime",                 "Unmap time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1460                 << tcu::TestLog::ValueInfo("WriteTime",                 "Write time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1461                 << tcu::TestLog::ValueInfo("FlushTime",                 "Flush time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1462                 << tcu::TestLog::ValueInfo("FitResidual",               "Fit residual",                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1463                 << tcu::TestLog::EndSampleInfo;
1464
1465         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1466         {
1467                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1468                 log     << tcu::TestLog::Sample
1469                         << samples[sampleNdx].writtenSize
1470                         << samples[sampleNdx].bufferSize
1471                         << (int)samples[sampleNdx].duration.totalDuration
1472                         << (int)samples[sampleNdx].duration.allocDuration
1473                         << (int)samples[sampleNdx].duration.mapDuration
1474                         << (int)samples[sampleNdx].duration.unmapDuration
1475                         << (int)samples[sampleNdx].duration.writeDuration
1476                         << (int)samples[sampleNdx].duration.flushDuration
1477                         << fitResidual
1478                         << tcu::TestLog::EndSample;
1479         }
1480
1481         log << tcu::TestLog::EndSampleList;
1482 }
1483
1484 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
1485 {
1486         log << tcu::TestLog::SampleList("Samples", "Samples")
1487                 << tcu::TestLog::SampleInfo
1488                 << tcu::TestLog::ValueInfo("WrittenSize",               "Written size",                 "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1489                 << tcu::TestLog::ValueInfo("BufferSize",                "Buffer size",                  "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1490                 << tcu::TestLog::ValueInfo("TotalTime",                 "Total time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1491                 << tcu::TestLog::ValueInfo("MapTime",                   "Map time",                             "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1492                 << tcu::TestLog::ValueInfo("UnmapTime",                 "Unmap time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1493                 << tcu::TestLog::ValueInfo("WriteTime",                 "Write time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1494                 << tcu::TestLog::ValueInfo("FlushTime",                 "Flush time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1495                 << tcu::TestLog::ValueInfo("FitResidual",               "Fit residual",                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1496                 << tcu::TestLog::EndSampleInfo;
1497
1498         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1499         {
1500                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1501                 log     << tcu::TestLog::Sample
1502                         << samples[sampleNdx].writtenSize
1503                         << samples[sampleNdx].bufferSize
1504                         << (int)samples[sampleNdx].duration.totalDuration
1505                         << (int)samples[sampleNdx].duration.mapDuration
1506                         << (int)samples[sampleNdx].duration.unmapDuration
1507                         << (int)samples[sampleNdx].duration.writeDuration
1508                         << (int)samples[sampleNdx].duration.flushDuration
1509                         << fitResidual
1510                         << tcu::TestLog::EndSample;
1511         }
1512
1513         log << tcu::TestLog::EndSampleList;
1514 }
1515
1516 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
1517 {
1518         log << tcu::TestLog::SampleList("Samples", "Samples")
1519                 << tcu::TestLog::SampleInfo
1520                 << tcu::TestLog::ValueInfo("DataSize",                  "Data processed",               "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1521                 << tcu::TestLog::ValueInfo("VertexCount",               "Number of vertices",   "vertices",     QP_SAMPLE_VALUE_TAG_PREDICTOR)
1522                 << tcu::TestLog::ValueInfo("TotalTime",                 "Total time",                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1523                 << tcu::TestLog::ValueInfo("DrawCallTime",              "Draw call time",               "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1524                 << tcu::TestLog::ValueInfo("ReadTime",                  "ReadPixels time",              "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1525                 << tcu::TestLog::ValueInfo("FitResidual",               "Fit residual",                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1526                 << tcu::TestLog::EndSampleInfo;
1527
1528         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1529         {
1530                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1531                 log     << tcu::TestLog::Sample
1532                         << samples[sampleNdx].renderDataSize
1533                         << samples[sampleNdx].numVertices
1534                         << (int)samples[sampleNdx].duration.renderReadDuration
1535                         << (int)samples[sampleNdx].duration.renderDuration
1536                         << (int)samples[sampleNdx].duration.readDuration
1537                         << fitResidual
1538                         << tcu::TestLog::EndSample;
1539         }
1540
1541         log << tcu::TestLog::EndSampleList;
1542 }
1543
1544 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
1545 {
1546         log << tcu::TestLog::SampleList("Samples", "Samples")
1547                 << tcu::TestLog::SampleInfo
1548                 << tcu::TestLog::ValueInfo("DataSize",                          "Data processed",                       "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1549                 << tcu::TestLog::ValueInfo("VertexCount",                       "Number of vertices",           "vertices",     QP_SAMPLE_VALUE_TAG_PREDICTOR)
1550                 << tcu::TestLog::ValueInfo("UnrelatedUploadSize",       "Unrelated upload size",        "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1551                 << tcu::TestLog::ValueInfo("TotalTime",                         "Total time",                           "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1552                 << tcu::TestLog::ValueInfo("DrawCallTime",                      "Draw call time",                       "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1553                 << tcu::TestLog::ValueInfo("ReadTime",                          "ReadPixels time",                      "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1554                 << tcu::TestLog::ValueInfo("FitResidual",                       "Fit residual",                         "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1555                 << tcu::TestLog::EndSampleInfo;
1556
1557         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1558         {
1559                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1560                 log     << tcu::TestLog::Sample
1561                         << samples[sampleNdx].renderDataSize
1562                         << samples[sampleNdx].numVertices
1563                         << samples[sampleNdx].unrelatedDataSize
1564                         << (int)samples[sampleNdx].duration.renderReadDuration
1565                         << (int)samples[sampleNdx].duration.renderDuration
1566                         << (int)samples[sampleNdx].duration.readDuration
1567                         << fitResidual
1568                         << tcu::TestLog::EndSample;
1569         }
1570
1571         log << tcu::TestLog::EndSampleList;
1572 }
1573
1574 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
1575 {
1576         log << tcu::TestLog::SampleList("Samples", "Samples")
1577                 << tcu::TestLog::SampleInfo
1578                 << tcu::TestLog::ValueInfo("DataSize",                  "Data processed",                                       "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1579                 << tcu::TestLog::ValueInfo("UploadSize",                "Data uploaded",                                        "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1580                 << tcu::TestLog::ValueInfo("VertexCount",               "Number of vertices",                           "vertices",     QP_SAMPLE_VALUE_TAG_PREDICTOR)
1581                 << tcu::TestLog::ValueInfo("DrawReadTime",              "Draw call and ReadPixels time",        "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1582                 << tcu::TestLog::ValueInfo("TotalTime",                 "Total time",                                           "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1583                 << tcu::TestLog::ValueInfo("Upload time",               "Upload time",                                          "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1584                 << tcu::TestLog::ValueInfo("DrawCallTime",              "Draw call time",                                       "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1585                 << tcu::TestLog::ValueInfo("ReadTime",                  "ReadPixels time",                                      "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1586                 << tcu::TestLog::ValueInfo("FitResidual",               "Fit residual",                                         "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1587                 << tcu::TestLog::EndSampleInfo;
1588
1589         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1590         {
1591                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1592                 log     << tcu::TestLog::Sample
1593                         << samples[sampleNdx].renderDataSize
1594                         << samples[sampleNdx].uploadedDataSize
1595                         << samples[sampleNdx].numVertices
1596                         << (int)samples[sampleNdx].duration.renderReadDuration
1597                         << (int)samples[sampleNdx].duration.totalDuration
1598                         << (int)samples[sampleNdx].duration.uploadDuration
1599                         << (int)samples[sampleNdx].duration.renderDuration
1600                         << (int)samples[sampleNdx].duration.readDuration
1601                         << fitResidual
1602                         << tcu::TestLog::EndSample;
1603         }
1604
1605         log << tcu::TestLog::EndSampleList;
1606 }
1607
1608 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
1609 {
1610         log << tcu::TestLog::SampleList("Samples", "Samples")
1611                 << tcu::TestLog::SampleInfo
1612                 << tcu::TestLog::ValueInfo("DataSize",                          "Data processed",                                       "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1613                 << tcu::TestLog::ValueInfo("UploadSize",                        "Data uploaded",                                        "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1614                 << tcu::TestLog::ValueInfo("VertexCount",                       "Number of vertices",                           "vertices",     QP_SAMPLE_VALUE_TAG_PREDICTOR)
1615                 << tcu::TestLog::ValueInfo("UnrelatedUploadSize",       "Unrelated upload size",                        "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1616                 << tcu::TestLog::ValueInfo("DrawReadTime",                      "Draw call and ReadPixels time",        "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1617                 << tcu::TestLog::ValueInfo("TotalTime",                         "Total time",                                           "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1618                 << tcu::TestLog::ValueInfo("Upload time",                       "Upload time",                                          "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1619                 << tcu::TestLog::ValueInfo("DrawCallTime",                      "Draw call time",                                       "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1620                 << tcu::TestLog::ValueInfo("ReadTime",                          "ReadPixels time",                                      "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1621                 << tcu::TestLog::ValueInfo("FitResidual",                       "Fit residual",                                         "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1622                 << tcu::TestLog::EndSampleInfo;
1623
1624         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1625         {
1626                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1627                 log     << tcu::TestLog::Sample
1628                         << samples[sampleNdx].renderDataSize
1629                         << samples[sampleNdx].uploadedDataSize
1630                         << samples[sampleNdx].numVertices
1631                         << samples[sampleNdx].unrelatedDataSize
1632                         << (int)samples[sampleNdx].duration.renderReadDuration
1633                         << (int)samples[sampleNdx].duration.totalDuration
1634                         << (int)samples[sampleNdx].duration.uploadDuration
1635                         << (int)samples[sampleNdx].duration.renderDuration
1636                         << (int)samples[sampleNdx].duration.readDuration
1637                         << fitResidual
1638                         << tcu::TestLog::EndSample;
1639         }
1640
1641         log << tcu::TestLog::EndSampleList;
1642 }
1643
1644 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
1645 {
1646         log << tcu::TestLog::SampleList("Samples", "Samples")
1647                 << tcu::TestLog::SampleInfo
1648                 << tcu::TestLog::ValueInfo("DataSize",                          "Data processed",                                               "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1649                 << tcu::TestLog::ValueInfo("UploadSize",                        "Data uploaded",                                                "bytes",        QP_SAMPLE_VALUE_TAG_PREDICTOR)
1650                 << tcu::TestLog::ValueInfo("VertexCount",                       "Number of vertices",                                   "vertices",     QP_SAMPLE_VALUE_TAG_PREDICTOR)
1651                 << tcu::TestLog::ValueInfo("DrawReadTime",                      "Second draw call and ReadPixels time", "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1652                 << tcu::TestLog::ValueInfo("TotalTime",                         "Total time",                                                   "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1653                 << tcu::TestLog::ValueInfo("FirstDrawCallTime",         "First draw call time",                                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1654                 << tcu::TestLog::ValueInfo("Upload time",                       "Upload time",                                                  "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1655                 << tcu::TestLog::ValueInfo("SecondDrawCallTime",        "Second draw call time",                                "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1656                 << tcu::TestLog::ValueInfo("ReadTime",                          "ReadPixels time",                                              "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1657                 << tcu::TestLog::ValueInfo("FitResidual",                       "Fit residual",                                                 "us",           QP_SAMPLE_VALUE_TAG_RESPONSE)
1658                 << tcu::TestLog::EndSampleInfo;
1659
1660         for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1661         {
1662                 const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1663                 log     << tcu::TestLog::Sample
1664                         << samples[sampleNdx].renderDataSize
1665                         << samples[sampleNdx].uploadedDataSize
1666                         << samples[sampleNdx].numVertices
1667                         << (int)samples[sampleNdx].duration.renderReadDuration
1668                         << (int)samples[sampleNdx].duration.totalDuration
1669                         << (int)samples[sampleNdx].duration.firstRenderDuration
1670                         << (int)samples[sampleNdx].duration.uploadDuration
1671                         << (int)samples[sampleNdx].duration.secondRenderDuration
1672                         << (int)samples[sampleNdx].duration.readDuration
1673                         << fitResidual
1674                         << tcu::TestLog::EndSample;
1675         }
1676
1677         log << tcu::TestLog::EndSampleList;
1678 }
1679
1680 template <typename SampleType>
1681 static UploadSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, bool logBucketPerformance)
1682 {
1683         // Assume data is linear with some outliers, fit a line
1684         const LineParametersWithConfidence                                              theilSenFitting                                         = fitLineToSamples(samples);
1685         const typename SampleTypeTraits<SampleType>::StatsType  resultStats                                                     = calculateSampleStatistics(theilSenFitting, samples);
1686         float                                                                                                   approximatedTransferRate;
1687         float                                                                                                   approximatedTransferRateNoConstant;
1688
1689         // Output raw samples
1690         {
1691                 const tcu::ScopedLogSection     section(log, "Samples", "Samples");
1692                 logSampleList(log, theilSenFitting, samples);
1693         }
1694
1695         // Calculate results for different ranges
1696         if (logBucketPerformance)
1697         {
1698                 const int                                                                               numBuckets                              = 4;
1699                 int                                                                                             minBufferSize                   = 0;
1700                 int                                                                                             maxBufferSize                   = 0;
1701                 std::vector<UploadSampleResult<SampleType> >    buckets[numBuckets];
1702
1703                 bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);
1704
1705                 for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
1706                 {
1707                         if (buckets[bucketNdx].empty())
1708                                 continue;
1709
1710                         // Print a nice result summary
1711
1712                         const int                                                                                               bucketRangeMin  = minBufferSize + (int)(((float) bucketNdx    / (float)numBuckets) * (float)(maxBufferSize - minBufferSize));
1713                         const int                                                                                               bucketRangeMax  = minBufferSize + (int)(((float)(bucketNdx+1) / (float)numBuckets) * (float)(maxBufferSize - minBufferSize));
1714                         const typename SampleTypeTraits<SampleType>::StatsType  stats                   = calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
1715                         const tcu::ScopedLogSection                                                             section                 (log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [").append(getHumanReadableByteSize(bucketRangeMin).append(", ").append(getHumanReadableByteSize(bucketRangeMax).append("]"))));
1716
1717                         logMapRangeStats<SampleType>(log, stats);
1718                         logUnmapStats<SampleType>(log, stats);
1719                         logWriteStats<SampleType>(log, stats);
1720                         logFlushStats<SampleType>(log, stats);
1721                         logAllocStats<SampleType>(log, stats);
1722
1723                         log     << tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
1724                                 << tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
1725                                 << tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime)
1726                                 << tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime)
1727                                 << tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
1728                                 << tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f)
1729                                 << tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime)
1730                                 << tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime)
1731                                 << tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime)
1732                                 << tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f)
1733                                 << tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
1734                                 << tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
1735                 }
1736         }
1737
1738         // Contributions
1739         if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1740         {
1741                 const tcu::ScopedLogSection     section(log, "Contribution", "Contributions");
1742
1743                 logMapContribution(log, samples, resultStats);
1744                 logUnmapContribution(log, samples, resultStats);
1745                 logWriteContribution(log, samples, resultStats);
1746                 logFlushContribution(log, samples, resultStats);
1747                 logAllocContribution(log, samples, resultStats);
1748         }
1749
1750         // Print results
1751         {
1752                 const tcu::ScopedLogSection     section(log, "Results", "Results");
1753
1754                 const int       medianBufferSize                                        = (samples.front().bufferSize + samples.back().bufferSize) / 2;
1755                 const float     approximatedTransferTime                        = (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
1756                 const float     approximatedTransferTimeNoConstant      = (theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
1757                 const float     sampleLinearity                                         = calculateSampleFitLinearity(samples);
1758                 const float     sampleTemporalStability                         = calculateSampleTemporalStability(samples);
1759
1760                 approximatedTransferRateNoConstant                              = (float)medianBufferSize / approximatedTransferTimeNoConstant;
1761                 approximatedTransferRate                                                = (float)medianBufferSize / approximatedTransferTime;
1762
1763                 log     << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1764                         << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1765                         << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1766                         << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1767                         << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1768                         << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1769                         << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1770                         << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1771                         << tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
1772                         << tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
1773                         << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1774                         << tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1775         }
1776
1777         // return approximated transfer rate
1778         {
1779                 UploadSampleAnalyzeResult result;
1780
1781                 result.transferRateMedian = resultStats.medianRate;
1782                 result.transferRateAtRange = approximatedTransferRate;
1783                 result.transferRateAtInfinity = approximatedTransferRateNoConstant;
1784
1785                 return result;
1786         }
1787 }
1788
1789 template <typename SampleType>
1790 static RenderSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples)
1791 {
1792         // Assume data is linear with some outliers, fit a line
1793         const LineParametersWithConfidence                                              theilSenFitting                                         = fitLineToSamples(samples);
1794         const typename SampleTypeTraits<SampleType>::StatsType  resultStats                                                     = calculateSampleStatistics(theilSenFitting, samples);
1795         float                                                                                                   approximatedProcessingRate;
1796         float                                                                                                   approximatedProcessingRateNoConstant;
1797
1798         // output raw samples
1799         {
1800                 const tcu::ScopedLogSection     section(log, "Samples", "Samples");
1801                 logSampleList(log, theilSenFitting, samples);
1802         }
1803
1804         // Contributions
1805         if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1806         {
1807                 const tcu::ScopedLogSection     section(log, "Contribution", "Contributions");
1808
1809                 logFirstRenderContribution(log, samples, resultStats);
1810                 logUploadContribution(log, samples, resultStats);
1811                 logRenderContribution(log, samples, resultStats);
1812                 logSecondRenderContribution(log, samples, resultStats);
1813                 logReadContribution(log, samples, resultStats);
1814                 logTotalContribution(log, samples, resultStats);
1815         }
1816
1817         // print results
1818         {
1819                 const tcu::ScopedLogSection     section(log, "Results", "Results");
1820
1821                 const int       medianDataSize                                          = (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
1822                 const float     approximatedRenderTime                          = (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
1823                 const float     approximatedRenderTimeNoConstant        = (theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
1824                 const float     sampleLinearity                                         = calculateSampleFitLinearity(samples);
1825                 const float     sampleTemporalStability                         = calculateSampleTemporalStability(samples);
1826
1827                 approximatedProcessingRateNoConstant                    = (float)medianDataSize / approximatedRenderTimeNoConstant;
1828                 approximatedProcessingRate                                              = (float)medianDataSize / approximatedRenderTime;
1829
1830                 log     << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1831                         << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1832                         << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1833                         << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1834                         << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1835                         << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1836                         << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1837                         << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1838                         << tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
1839                         << tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
1840                         << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1841                         << tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1842         }
1843
1844         // return approximated render rate
1845         {
1846                 RenderSampleAnalyzeResult result;
1847
1848                 result.renderRateMedian         = resultStats.medianRate;
1849                 result.renderRateAtRange        = approximatedProcessingRate;
1850                 result.renderRateAtInfinity = approximatedProcessingRateNoConstant;
1851
1852                 return result;
1853         }
1854         return RenderSampleAnalyzeResult();
1855 }
1856
1857 static void generateTwoPassRandomIterationOrder (std::vector<int>& iterationOrder, int numSamples)
1858 {
1859         de::Random      rnd                     (0xabc);
1860         const int       midPoint        = (numSamples+1) / 2;           // !< ceil(m_numSamples / 2)
1861
1862         DE_ASSERT((int)iterationOrder.size() == numSamples);
1863
1864         // Two "passes" over range, randomize order in both passes
1865         // This allows to us detect if iterations are not independent
1866         // (first run and later run samples differ significantly?)
1867
1868         for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
1869                 iterationOrder[sampleNdx] = sampleNdx * 2;
1870         for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
1871                 iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;
1872
1873         for (int ndx = 0; ndx < midPoint; ++ndx)
1874                 std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
1875         for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
1876                 std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size()-1)]);
1877 }
1878
1879 template <typename SampleType>
1880 class BasicBufferCase : public TestCase
1881 {
1882 public:
1883
1884         enum Flags
1885         {
1886                 FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
1887         };
1888                                                         BasicBufferCase         (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags);
1889                                                         ~BasicBufferCase        (void);
1890
1891         virtual void                    init                            (void);
1892         virtual void                    deinit                          (void);
1893
1894 protected:
1895         IterateResult                   iterate                         (void);
1896
1897         virtual bool                    runSample                       (int iteration, UploadSampleResult<SampleType>& sample) = 0;
1898         virtual void                    logAndSetTestResult     (const std::vector<UploadSampleResult<SampleType> >& results) = 0;
1899
1900         void                                    disableGLWarmup         (void);
1901         void                                    waitGLResults           (void);
1902
1903         enum
1904         {
1905                 DUMMY_RENDER_AREA_SIZE = 32
1906         };
1907
1908         glu::ShaderProgram*             m_dummyProgram;
1909         deInt32                                 m_dummyProgramPosLoc;
1910         deUint32                                m_bufferID;
1911
1912         const int                               m_numSamples;
1913         const int                               m_bufferSizeMin;
1914         const int                               m_bufferSizeMax;
1915         const bool                              m_allocateLargerBuffer;
1916
1917 private:
1918         int                                             m_iteration;
1919         std::vector<int>                m_iterationOrder;
1920         std::vector<UploadSampleResult<SampleType> > m_results;
1921
1922         bool                                    m_useGL;
1923         int                                             m_bufferRandomizerTimer;
1924 };
1925
1926 template <typename SampleType>
1927 BasicBufferCase<SampleType>::BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags)
1928         : TestCase                                      (context, tcu::NODETYPE_PERFORMANCE, name, desc)
1929         , m_dummyProgram                        (DE_NULL)
1930         , m_dummyProgramPosLoc          (-1)
1931         , m_bufferID                            (0)
1932         , m_numSamples                          (numSamples)
1933         , m_bufferSizeMin                       (bufferSizeMin)
1934         , m_bufferSizeMax                       (bufferSizeMax)
1935         , m_allocateLargerBuffer        ((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
1936         , m_iteration                           (0)
1937         , m_iterationOrder                      (numSamples)
1938         , m_results                                     (numSamples)
1939         , m_useGL                                       (true)
1940         , m_bufferRandomizerTimer       (0)
1941 {
1942         // "randomize" iteration order. Deterministic, patternless
1943         generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);
1944
1945         // choose buffer sizes
1946         for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
1947         {
1948                 const int rawBufferSize                 = (int)deFloatFloor((float)bufferSizeMin + (float)(bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / (float)m_numSamples));
1949                 const int bufferSize                    = deAlign32(rawBufferSize, 16);
1950                 const int allocatedBufferSize   = deAlign32((m_allocateLargerBuffer) ? ((int)((float)bufferSize * 1.5f)) : (bufferSize), 16);
1951
1952                 m_results[sampleNdx].bufferSize         = bufferSize;
1953                 m_results[sampleNdx].allocatedSize      = allocatedBufferSize;
1954                 m_results[sampleNdx].writtenSize        = -1;
1955         }
1956 }
1957
1958 template <typename SampleType>
1959 BasicBufferCase<SampleType>::~BasicBufferCase (void)
1960 {
1961         deinit();
1962 }
1963
1964 template <typename SampleType>
1965 void BasicBufferCase<SampleType>::init (void)
1966 {
1967         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1968
1969         if (!m_useGL)
1970                 return;
1971
1972         // \note Viewport size is not checked, it won't matter if the render target actually is smaller hhan DUMMY_RENDER_AREA_SIZE
1973
1974         // dummy shader
1975
1976         m_dummyProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_dummyVertexShader) << glu::FragmentSource(s_dummyFragnentShader));
1977         if (!m_dummyProgram->isOk())
1978         {
1979                 m_testCtx.getLog() << *m_dummyProgram;
1980                 throw tcu::TestError("failed to build shader program");
1981         }
1982
1983         m_dummyProgramPosLoc = gl.getAttribLocation(m_dummyProgram->getProgram(), "a_position");
1984         if (m_dummyProgramPosLoc == -1)
1985                 throw tcu::TestError("a_position location was -1");
1986 }
1987
1988 template <typename SampleType>
1989 void BasicBufferCase<SampleType>::deinit (void)
1990 {
1991         if (m_bufferID)
1992         {
1993                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
1994                 m_bufferID = 0;
1995         }
1996
1997         delete m_dummyProgram;
1998         m_dummyProgram = DE_NULL;
1999 }
2000
2001 template <typename SampleType>
2002 TestCase::IterateResult BasicBufferCase<SampleType>::iterate (void)
2003 {
2004         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2005         static bool                             buffersWarmedUp = false;
2006
2007         static const deUint32   usages[] =
2008         {
2009                 GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY,
2010                 GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY,
2011                 GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
2012         };
2013
2014         // Allocate some random sized buffers and remove them to
2015         // make sure the first samples too have some buffers removed
2016         // just before their allocation. This is only needed by the
2017         // the first test.
2018
2019         if (m_useGL && !buffersWarmedUp)
2020         {
2021                 const int                                       numRandomBuffers                                = 6;
2022                 const int                                       numRepeats                                              = 10;
2023                 const int                                       maxBufferSize                                   = 16777216;
2024                 const std::vector<deUint8>      zeroData                                                (maxBufferSize, 0x00);
2025                 de::Random                                      rnd                                                             (0x1234);
2026                 deUint32                                        bufferIDs[numRandomBuffers]             = {0};
2027
2028                 gl.useProgram(m_dummyProgram->getProgram());
2029                 gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2030                 gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2031
2032                 for (int ndx = 0; ndx < numRepeats; ++ndx)
2033                 {
2034                         // Create buffer and maybe draw from it
2035                         for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2036                         {
2037                                 const int               randomSize      = deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2038                                 const deUint32  usage           = usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2039
2040                                 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2041                                 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2042                                 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2043
2044                                 if (rnd.getBool())
2045                                 {
2046                                         gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2047                                         gl.drawArrays(GL_POINTS, 0, 1);
2048                                         gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2049                                 }
2050                         }
2051
2052                         for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2053                                 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2054
2055                         waitGLResults();
2056                         GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2057
2058                         m_testCtx.touchWatchdog();
2059                 }
2060
2061                 buffersWarmedUp = true;
2062                 return CONTINUE;
2063         }
2064         else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
2065         {
2066                 // Do some random buffer operations to every now and then
2067                 // to make sure the previous test iterations won't affect
2068                 // following test runs.
2069
2070                 const int                                       numRandomBuffers                                = 3;
2071                 const int                                       maxBufferSize                                   = 16777216;
2072                 const std::vector<deUint8>      zeroData                                                (maxBufferSize, 0x00);
2073                 de::Random                                      rnd                                                             (0x1234 + 0xabc * m_bufferRandomizerTimer);
2074
2075                 // BufferData
2076                 {
2077                         deUint32 bufferIDs[numRandomBuffers] = {0};
2078
2079                         for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2080                         {
2081                                 const int               randomSize      = deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2082                                 const deUint32  usage           = usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2083
2084                                 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2085                                 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2086                                 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2087                         }
2088
2089                         for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2090                                 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2091                 }
2092
2093                 GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");
2094
2095                 // Do some memory mappings
2096                 {
2097                         deUint32 bufferIDs[numRandomBuffers] = {0};
2098
2099                         for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2100                         {
2101                                 const int               randomSize      = deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2102                                 const deUint32  usage           = usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2103                                 void*                   ptr;
2104
2105                                 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2106                                 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2107                                 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2108
2109                                 gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2110                                 gl.drawArrays(GL_POINTS, 0, 1);
2111                                 gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2112
2113                                 if (rnd.getBool())
2114                                         waitGLResults();
2115
2116                                 ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
2117                                 if (ptr)
2118                                 {
2119                                         medianTimeMemcpy(ptr, &zeroData[0], randomSize);
2120                                         gl.unmapBuffer(GL_ARRAY_BUFFER);
2121                                 }
2122                         }
2123
2124                         for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2125                                 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2126
2127                         waitGLResults();
2128                 }
2129
2130                 GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
2131                 return CONTINUE;
2132         }
2133         else
2134         {
2135                 const int       currentIteration        = m_iteration;
2136                 const int       sampleNdx                       = m_iterationOrder[currentIteration];
2137                 const bool      sampleRunSuccessful     = runSample(currentIteration, m_results[sampleNdx]);
2138
2139                 GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");
2140
2141                 // Retry failed samples
2142                 if (!sampleRunSuccessful)
2143                         return CONTINUE;
2144
2145                 if (++m_iteration >= m_numSamples)
2146                 {
2147                         logAndSetTestResult(m_results);
2148                         return STOP;
2149                 }
2150                 else
2151                         return CONTINUE;
2152         }
2153 }
2154
2155 template <typename SampleType>
2156 void BasicBufferCase<SampleType>::disableGLWarmup (void)
2157 {
2158         m_useGL = false;
2159 }
2160
2161 template <typename SampleType>
2162 void BasicBufferCase<SampleType>::waitGLResults (void)
2163 {
2164         tcu::Surface dummySurface(DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2165         glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
2166 }
2167
2168 template <typename SampleType>
2169 class BasicUploadCase : public BasicBufferCase<SampleType>
2170 {
2171 public:
2172         enum CaseType
2173         {
2174                 CASE_NO_BUFFERS = 0,
2175                 CASE_NEW_BUFFER,
2176                 CASE_UNSPECIFIED_BUFFER,
2177                 CASE_SPECIFIED_BUFFER,
2178                 CASE_USED_BUFFER,
2179                 CASE_USED_LARGER_BUFFER,
2180
2181                 CASE_LAST
2182         };
2183
2184         enum CaseFlags
2185         {
2186                 FLAG_DONT_LOG_BUFFER_INFO                               = 0x01,
2187                 FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT  = 0x02,
2188         };
2189
2190         enum ResultType
2191         {
2192                 RESULT_MEDIAN_TRANSFER_RATE = 0,
2193                 RESULT_ASYMPTOTIC_TRANSFER_RATE,
2194         };
2195
2196                                                 BasicUploadCase         (Context& context,
2197                                                                                          const char* name,
2198                                                                                          const char* desc,
2199                                                                                          int bufferSizeMin,
2200                                                                                          int bufferSizeMax,
2201                                                                                          int numSamples,
2202                                                                                          deUint32 bufferUsage,
2203                                                                                          CaseType caseType,
2204                                                                                          ResultType resultType,
2205                                                                                          int flags = 0);
2206
2207                                                 ~BasicUploadCase        (void);
2208
2209         virtual void            init                            (void);
2210         virtual void            deinit                          (void);
2211
2212 private:
2213         bool                            runSample                       (int iteration, UploadSampleResult<SampleType>& sample);
2214         void                            createBuffer            (int bufferSize, int iteration);
2215         void                            deleteBuffer            (int bufferSize);
2216         void                            useBuffer                       (int bufferSize);
2217
2218         virtual void            testBufferUpload        (UploadSampleResult<SampleType>& result, int writeSize) = 0;
2219         void                            logAndSetTestResult     (const std::vector<UploadSampleResult<SampleType> >& results);
2220
2221         deUint32                        m_dummyBufferID;
2222
2223 protected:
2224         const CaseType          m_caseType;
2225         const ResultType        m_resultType;
2226         const deUint32          m_bufferUsage;
2227         const bool                      m_logBufferInfo;
2228         const bool                      m_bufferUnspecifiedContent;
2229         std::vector<deUint8> m_zeroData;
2230
2231         using BasicBufferCase<SampleType>::m_testCtx;
2232         using BasicBufferCase<SampleType>::m_context;
2233
2234         using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
2235         using BasicBufferCase<SampleType>::m_dummyProgram;
2236         using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
2237         using BasicBufferCase<SampleType>::m_bufferID;
2238         using BasicBufferCase<SampleType>::m_numSamples;
2239         using BasicBufferCase<SampleType>::m_bufferSizeMin;
2240         using BasicBufferCase<SampleType>::m_bufferSizeMax;
2241         using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
2242 };
2243
2244 template <typename SampleType>
2245 BasicUploadCase<SampleType>::BasicUploadCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, deUint32 bufferUsage, CaseType caseType, ResultType resultType, int flags)
2246         : BasicBufferCase<SampleType>   (context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
2247         , m_dummyBufferID                               (0)
2248         , m_caseType                                    (caseType)
2249         , m_resultType                                  (resultType)
2250         , m_bufferUsage                                 (bufferUsage)
2251         , m_logBufferInfo                               ((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
2252         , m_bufferUnspecifiedContent    ((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
2253         , m_zeroData                                    ()
2254 {
2255         DE_ASSERT(m_caseType < CASE_LAST);
2256 }
2257
2258 template <typename SampleType>
2259 BasicUploadCase<SampleType>::~BasicUploadCase (void)
2260 {
2261         deinit();
2262 }
2263
2264 template <typename SampleType>
2265 void BasicUploadCase<SampleType>::init (void)
2266 {
2267         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2268
2269         BasicBufferCase<SampleType>::init();
2270
2271         // zero buffer as upload source
2272         m_zeroData.resize(m_bufferSizeMax, 0x00);
2273
2274         // dummy buffer
2275
2276         gl.genBuffers(1, &m_dummyBufferID);
2277         GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");
2278
2279         // log basic info
2280
2281         m_testCtx.getLog()
2282                 << tcu::TestLog::Message
2283                 << "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
2284                 << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
2285                 << tcu::TestLog::EndMessage;
2286
2287         if (m_logBufferInfo)
2288         {
2289                 switch (m_caseType)
2290                 {
2291                         case CASE_NO_BUFFERS:
2292                                 break;
2293
2294                         case CASE_NEW_BUFFER:
2295                                 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage;
2296                                 break;
2297
2298                         case CASE_UNSPECIFIED_BUFFER:
2299                                 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage;
2300                                 break;
2301
2302                         case CASE_SPECIFIED_BUFFER:
2303                                 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage;
2304                                 break;
2305
2306                         case CASE_USED_BUFFER:
2307                                 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage;
2308                                 break;
2309
2310                         case CASE_USED_LARGER_BUFFER:
2311                                 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage;
2312                                 break;
2313
2314                         default:
2315                                 DE_ASSERT(false);
2316                                 break;
2317                 }
2318         }
2319
2320         if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
2321                 m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage;
2322         else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
2323                 m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage;
2324         else
2325                 DE_ASSERT(false);
2326 }
2327
2328 template <typename SampleType>
2329 void BasicUploadCase<SampleType>::deinit (void)
2330 {
2331         if (m_dummyBufferID)
2332         {
2333                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_dummyBufferID);
2334                 m_dummyBufferID = 0;
2335         }
2336
2337         m_zeroData = std::vector<deUint8>();
2338
2339         BasicBufferCase<SampleType>::deinit();
2340 }
2341
2342 template <typename SampleType>
2343 bool BasicUploadCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
2344 {
2345         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2346         const int                               allocatedBufferSize     = sample.allocatedSize;
2347         const int                               bufferSize                      = sample.bufferSize;
2348
2349         if (m_caseType != CASE_NO_BUFFERS)
2350                 createBuffer(iteration, allocatedBufferSize);
2351
2352         // warmup CPU before the test to make sure the power management governor
2353         // keeps us in the "high performance" mode
2354         {
2355                 deYield();
2356                 tcu::warmupCPU();
2357                 deYield();
2358         }
2359
2360         testBufferUpload(sample, bufferSize);
2361         GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
2362
2363         if (m_caseType != CASE_NO_BUFFERS)
2364                 deleteBuffer(bufferSize);
2365
2366         return true;
2367 }
2368
2369 template <typename SampleType>
2370 void BasicUploadCase<SampleType>::createBuffer (int iteration, int bufferSize)
2371 {
2372         DE_ASSERT(!m_bufferID);
2373         DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2374
2375         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2376
2377         // create buffer
2378
2379         if (m_caseType == CASE_NO_BUFFERS)
2380                 return;
2381
2382         // create empty buffer
2383
2384         gl.genBuffers(1, &m_bufferID);
2385         gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2386         GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2387
2388         if (m_caseType == CASE_NEW_BUFFER)
2389         {
2390                 // upload something else first, this should reduce noise in samples
2391
2392                 de::Random                                      rng                             (0xbadc * iteration);
2393                 const int                                       sizeDelta               = rng.getInt(0, 2097140);
2394                 const int                                       dummyUploadSize = deAlign32(1048576 + sizeDelta, 4*4); // Vary buffer size to make sure it is always reallocated
2395                 const std::vector<deUint8>      dummyData               (dummyUploadSize, 0x20);
2396
2397                 gl.bindBuffer(GL_ARRAY_BUFFER, m_dummyBufferID);
2398                 gl.bufferData(GL_ARRAY_BUFFER, dummyUploadSize, &dummyData[0], m_bufferUsage);
2399
2400                 // make sure upload won't interfere with the test
2401                 useBuffer(dummyUploadSize);
2402
2403                 // don't kill the buffer so that the following upload cannot potentially reuse the buffer
2404
2405                 return;
2406         }
2407
2408         // specify it
2409
2410         if (m_caseType == CASE_UNSPECIFIED_BUFFER)
2411                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2412         else
2413         {
2414                 const std::vector<deUint8> dummyData(bufferSize, 0x20);
2415                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2416         }
2417
2418         if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
2419                 return;
2420
2421         // use it and make sure it is uploaded
2422
2423         useBuffer(bufferSize);
2424         DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
2425 }
2426
2427 template <typename SampleType>
2428 void BasicUploadCase<SampleType>::deleteBuffer (int bufferSize)
2429 {
2430         DE_ASSERT(m_bufferID);
2431         DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2432
2433         // render from the buffer to make sure it actually made it to the gpu. This is to
2434         // make sure that if the upload actually happens later or is happening right now in
2435         // the background, it will not interfere with further test runs
2436
2437         // if buffer contains unspecified content, sourcing data from it results in undefined
2438         // results, possibly including program termination. Specify all data to prevent such
2439         // case from happening
2440
2441         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2442
2443         gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2444
2445         if (m_bufferUnspecifiedContent)
2446         {
2447                 const std::vector<deUint8> dummyData(bufferSize, 0x20);
2448                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2449
2450                 GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
2451         }
2452
2453         useBuffer(bufferSize);
2454
2455         gl.deleteBuffers(1, &m_bufferID);
2456         m_bufferID = 0;
2457 }
2458
2459 template <typename SampleType>
2460 void BasicUploadCase<SampleType>::useBuffer (int bufferSize)
2461 {
2462         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2463
2464         gl.useProgram(m_dummyProgram->getProgram());
2465
2466         gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2467         gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2468         gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2469
2470         // use whole buffer to make sure buffer is uploaded by drawing first and last
2471         DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
2472         gl.drawArrays(GL_POINTS, 0, 1);
2473         gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);
2474
2475         BasicBufferCase<SampleType>::waitGLResults();
2476 }
2477
2478 template <typename SampleType>
2479 void BasicUploadCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
2480 {
2481         const UploadSampleAnalyzeResult analysis        = analyzeSampleResults(m_testCtx.getLog(), results, true);
2482
2483         // with small buffers, report the median transfer rate of the samples
2484         // with large buffers, report the expected preformance of infinitely large buffers
2485         const float                                             rate            = (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian);
2486
2487         if (rate == std::numeric_limits<float>::infinity())
2488         {
2489                 // sample times are 1) invalid or 2) timer resolution too low
2490                 // report speed 0 bytes / s since real value cannot be determined
2491                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
2492         }
2493         else
2494         {
2495                 // report transfer rate in MB / s
2496                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
2497         }
2498 }
2499
2500 class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
2501 {
2502 public:
2503                                 ReferenceMemcpyCase             (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase);
2504                                 ~ReferenceMemcpyCase    (void);
2505
2506         void            init                                    (void);
2507         void            deinit                                  (void);
2508 private:
2509         void            testBufferUpload                (UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2510
2511         std::vector<deUint8> m_dstBuf;
2512 };
2513
2514 ReferenceMemcpyCase::ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase)
2515         : BasicUploadCase<SingleOperationDuration>      (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
2516         , m_dstBuf                                                                      ()
2517 {
2518         disableGLWarmup();
2519 }
2520
2521 ReferenceMemcpyCase::~ReferenceMemcpyCase (void)
2522 {
2523 }
2524
2525 void ReferenceMemcpyCase::init (void)
2526 {
2527         // Describe what the test tries to do
2528         m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;
2529
2530         m_dstBuf.resize(m_bufferSizeMax, 0x00);
2531
2532         BasicUploadCase<SingleOperationDuration>::init();
2533 }
2534
2535 void ReferenceMemcpyCase::deinit (void)
2536 {
2537         m_dstBuf = std::vector<deUint8>();
2538         BasicUploadCase<SingleOperationDuration>::deinit();
2539 }
2540
2541 void ReferenceMemcpyCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2542 {
2543         // write
2544         result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
2545         result.duration.fitResponseDuration = result.duration.totalDuration;
2546
2547         result.writtenSize = bufferSize;
2548 }
2549
2550 class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2551 {
2552 public:
2553                                 BufferDataUploadCase    (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType);
2554                                 ~BufferDataUploadCase   (void);
2555
2556         void            init                                    (void);
2557 private:
2558         void            testBufferUpload                (UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2559 };
2560
2561 BufferDataUploadCase::BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType)
2562         : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE)
2563 {
2564 }
2565
2566 BufferDataUploadCase::~BufferDataUploadCase (void)
2567 {
2568 }
2569
2570 void BufferDataUploadCase::init (void)
2571 {
2572         // Describe what the test tries to do
2573         m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;
2574
2575         BasicUploadCase<SingleOperationDuration>::init();
2576 }
2577
2578 void BufferDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2579 {
2580         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2581
2582         gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2583
2584         // upload
2585         {
2586                 deUint64 startTime;
2587                 deUint64 endTime;
2588
2589                 startTime = deGetMicroseconds();
2590                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2591                 endTime = deGetMicroseconds();
2592
2593                 result.duration.totalDuration = endTime - startTime;
2594                 result.duration.fitResponseDuration = result.duration.totalDuration;
2595                 result.writtenSize = bufferSize;
2596         }
2597 }
2598
2599 class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2600 {
2601 public:
2602         enum Flags
2603         {
2604                 FLAG_FULL_UPLOAD                        = 0x01,
2605                 FLAG_PARTIAL_UPLOAD                     = 0x02,
2606                 FLAG_INVALIDATE_BEFORE_USE      = 0x04,
2607         };
2608
2609                                 BufferSubDataUploadCase         (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags);
2610                                 ~BufferSubDataUploadCase        (void);
2611
2612         void            init                                            (void);
2613 private:
2614         void            testBufferUpload                        (UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2615
2616         const bool      m_fullUpload;
2617         const bool      m_invalidateBeforeUse;
2618 };
2619
2620 BufferSubDataUploadCase::BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags)
2621         : BasicUploadCase<SingleOperationDuration>      (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE)
2622         , m_fullUpload                                                          ((flags & FLAG_FULL_UPLOAD) != 0)
2623         , m_invalidateBeforeUse                                         ((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
2624 {
2625         DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
2626         DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
2627 }
2628
2629 BufferSubDataUploadCase::~BufferSubDataUploadCase (void)
2630 {
2631 }
2632
2633 void BufferSubDataUploadCase::init (void)
2634 {
2635         // Describe what the test tries to do
2636         m_testCtx.getLog()
2637                 << tcu::TestLog::Message
2638                 << "Testing glBufferSubData() function call performance. "
2639                 << ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. "))
2640                 << ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n"
2641                 << tcu::TestLog::EndMessage;
2642
2643         BasicUploadCase<SingleOperationDuration>::init();
2644 }
2645
2646 void BufferSubDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2647 {
2648         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2649
2650         gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2651
2652         // "invalidate", upload null
2653         if (m_invalidateBeforeUse)
2654                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2655
2656         // upload
2657         {
2658                 deUint64 startTime;
2659                 deUint64 endTime;
2660
2661                 startTime = deGetMicroseconds();
2662
2663                 if (m_fullUpload)
2664                         gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
2665                 else
2666                 {
2667                         // upload to buffer center
2668                         gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
2669                 }
2670
2671                 endTime = deGetMicroseconds();
2672
2673                 result.duration.totalDuration = endTime - startTime;
2674                 result.duration.fitResponseDuration = result.duration.totalDuration;
2675
2676                 if (m_fullUpload)
2677                         result.writtenSize = bufferSize;
2678                 else
2679                         result.writtenSize = bufferSize / 2;
2680         }
2681 }
2682
2683 class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
2684 {
2685 public:
2686         enum Flags
2687         {
2688                 FLAG_PARTIAL                                            = 0x01,
2689                 FLAG_MANUAL_INVALIDATION                        = 0x02,
2690                 FLAG_USE_UNUSED_UNSPECIFIED_BUFFER      = 0x04,
2691                 FLAG_USE_UNUSED_SPECIFIED_BUFFER        = 0x08,
2692         };
2693
2694                                         MapBufferRangeCase                      (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2695                                         ~MapBufferRangeCase                     (void);
2696
2697         void                    init                                            (void);
2698 private:
2699         static CaseType getBaseCaseType                         (int caseFlags);
2700         static int              getBaseFlags                            (deUint32 mapFlags, int caseFlags);
2701
2702         void                    testBufferUpload                        (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2703         void                    attemptBufferMap                        (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2704
2705         const bool              m_manualInvalidation;
2706         const bool              m_fullUpload;
2707         const bool              m_useUnusedUnspecifiedBuffer;
2708         const bool              m_useUnusedSpecifiedBuffer;
2709         const deUint32  m_mapFlags;
2710         int                             m_unmapFailures;
2711 };
2712
2713 MapBufferRangeCase::MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2714         : BasicUploadCase<MapBufferRangeDuration>       (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2715         , m_manualInvalidation                                          ((caseFlags&FLAG_MANUAL_INVALIDATION) != 0)
2716         , m_fullUpload                                                          ((caseFlags&FLAG_PARTIAL) == 0)
2717         , m_useUnusedUnspecifiedBuffer                          ((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2718         , m_useUnusedSpecifiedBuffer                            ((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2719         , m_mapFlags                                                            (mapFlags)
2720         , m_unmapFailures                                                       (0)
2721 {
2722         DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
2723         DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
2724 }
2725
2726 MapBufferRangeCase::~MapBufferRangeCase (void)
2727 {
2728 }
2729
2730 void MapBufferRangeCase::init (void)
2731 {
2732         // Describe what the test tries to do
2733         m_testCtx.getLog()
2734                 << tcu::TestLog::Message
2735                 << "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
2736                 << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2737                 << ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2738                 << ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2739                 << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2740                 << ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
2741                 << "Map bits:\n"
2742                 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2743                 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2744                 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2745                 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2746                 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2747                 << tcu::TestLog::EndMessage;
2748
2749         BasicUploadCase<MapBufferRangeDuration>::init();
2750 }
2751
2752 MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType (int caseFlags)
2753 {
2754         if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2755                 return CASE_USED_BUFFER;
2756         else
2757                 return CASE_NEW_BUFFER;
2758 }
2759
2760 int MapBufferRangeCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2761 {
2762         int flags = FLAG_DONT_LOG_BUFFER_INFO;
2763
2764         // If buffer contains unspecified data when it is sourced (i.e drawn)
2765         // results are undefined, and system errors may occur. Signal parent
2766         // class to take this into account
2767         if (caseFlags & FLAG_PARTIAL)
2768         {
2769                 if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0                      ||
2770                         (caseFlags & FLAG_MANUAL_INVALIDATION) != 0                             ||
2771                         (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2772                 {
2773                         flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2774                 }
2775         }
2776
2777         return flags;
2778 }
2779
2780 void MapBufferRangeCase::testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2781 {
2782         const int unmapFailureThreshold = 4;
2783
2784         for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
2785         {
2786                 try
2787                 {
2788                         attemptBufferMap(result, bufferSize);
2789                         return;
2790                 }
2791                 catch (UnmapFailureError&)
2792                 {
2793                 }
2794         }
2795
2796         throw tcu::TestError("Unmapping failures exceeded limit");
2797 }
2798
2799 void MapBufferRangeCase::attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2800 {
2801         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2802
2803         gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2804
2805         if (m_fullUpload)
2806                 result.writtenSize = bufferSize;
2807         else
2808                 result.writtenSize = bufferSize / 2;
2809
2810         // Create unused buffer
2811
2812         if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
2813         {
2814                 deUint64 startTime;
2815                 deUint64 endTime;
2816
2817                 // "invalidate" or allocate, upload null
2818                 startTime = deGetMicroseconds();
2819                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2820                 endTime = deGetMicroseconds();
2821
2822                 result.duration.allocDuration = endTime - startTime;
2823         }
2824         else if (m_useUnusedSpecifiedBuffer)
2825         {
2826                 deUint64 startTime;
2827                 deUint64 endTime;
2828
2829                 // Specify buffer contents
2830                 startTime = deGetMicroseconds();
2831                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2832                 endTime = deGetMicroseconds();
2833
2834                 result.duration.allocDuration = endTime - startTime;
2835         }
2836         else
2837         {
2838                 // No alloc, no time
2839                 result.duration.allocDuration = 0;
2840         }
2841
2842         // upload
2843         {
2844                 void* mapPtr;
2845
2846                 // Map
2847                 {
2848                         deUint64 startTime;
2849                         deUint64 endTime;
2850
2851                         startTime = deGetMicroseconds();
2852                         if (m_fullUpload)
2853                                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
2854                         else
2855                         {
2856                                 // upload to buffer center
2857                                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
2858                         }
2859                         endTime = deGetMicroseconds();
2860
2861                         if (!mapPtr)
2862                                 throw tcu::Exception("MapBufferRange returned NULL");
2863
2864                         result.duration.mapDuration = endTime - startTime;
2865                 }
2866
2867                 // Write
2868                 {
2869                         result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
2870                 }
2871
2872                 // Unmap
2873                 {
2874                         deUint64                startTime;
2875                         deUint64                endTime;
2876                         glw::GLboolean  unmapSuccessful;
2877
2878                         startTime = deGetMicroseconds();
2879                         unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
2880                         endTime = deGetMicroseconds();
2881
2882                         // if unmapping fails, just try again later
2883                         if (!unmapSuccessful)
2884                                 throw UnmapFailureError();
2885
2886                         result.duration.unmapDuration = endTime - startTime;
2887                 }
2888
2889                 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration;
2890                 result.duration.fitResponseDuration = result.duration.totalDuration;
2891         }
2892 }
2893
2894 class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
2895 {
2896 public:
2897         enum Flags
2898         {
2899                 FLAG_PARTIAL                                            = 0x01,
2900                 FLAG_FLUSH_IN_PARTS                                     = 0x02,
2901                 FLAG_USE_UNUSED_UNSPECIFIED_BUFFER      = 0x04,
2902                 FLAG_USE_UNUSED_SPECIFIED_BUFFER        = 0x08,
2903                 FLAG_FLUSH_PARTIAL                                      = 0x10,
2904         };
2905
2906                                         MapBufferRangeFlushCase         (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2907                                         ~MapBufferRangeFlushCase        (void);
2908
2909         void                    init                                            (void);
2910 private:
2911         static CaseType getBaseCaseType                         (int caseFlags);
2912         static int              getBaseFlags                            (deUint32 mapFlags, int caseFlags);
2913
2914         void                    testBufferUpload                        (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2915         void                    attemptBufferMap                        (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2916
2917         const bool              m_fullUpload;
2918         const bool              m_flushInParts;
2919         const bool              m_flushPartial;
2920         const bool              m_useUnusedUnspecifiedBuffer;
2921         const bool              m_useUnusedSpecifiedBuffer;
2922         const deUint32  m_mapFlags;
2923         int                             m_unmapFailures;
2924 };
2925
2926 MapBufferRangeFlushCase::MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2927         : BasicUploadCase<MapBufferRangeFlushDuration>  (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2928         , m_fullUpload                                                                  ((caseFlags&FLAG_PARTIAL) == 0)
2929         , m_flushInParts                                                                ((caseFlags&FLAG_FLUSH_IN_PARTS) != 0)
2930         , m_flushPartial                                                                ((caseFlags&FLAG_FLUSH_PARTIAL) != 0)
2931         , m_useUnusedUnspecifiedBuffer                                  ((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2932         , m_useUnusedSpecifiedBuffer                                    ((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2933         , m_mapFlags                                                                    (mapFlags)
2934         , m_unmapFailures                                                               (0)
2935 {
2936         DE_ASSERT(!(m_flushPartial && m_flushInParts));
2937         DE_ASSERT(!(m_flushPartial && !m_fullUpload));
2938 }
2939
2940 MapBufferRangeFlushCase::~MapBufferRangeFlushCase (void)
2941 {
2942 }
2943
2944 void MapBufferRangeFlushCase::init (void)
2945 {
2946         // Describe what the test tries to do
2947         m_testCtx.getLog()
2948                 << tcu::TestLog::Message
2949                 << "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
2950                 << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2951                 << ((m_flushInParts) ?
2952                         ("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
2953                         (m_flushPartial) ?
2954                                 ("Half of the buffer range is flushed.") :
2955                                 ("The whole mapped range is flushed in one flush call.")) << "\n"
2956                 << ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2957                 << ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2958                 << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2959                 << "Map bits:\n"
2960                 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2961                 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2962                 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2963                 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2964                 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2965                 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
2966                 << tcu::TestLog::EndMessage;
2967
2968         BasicUploadCase<MapBufferRangeFlushDuration>::init();
2969 }
2970
2971 MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType (int caseFlags)
2972 {
2973         if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2974                 return CASE_USED_BUFFER;
2975         else
2976                 return CASE_NEW_BUFFER;
2977 }
2978
2979 int MapBufferRangeFlushCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2980 {
2981         int flags = FLAG_DONT_LOG_BUFFER_INFO;
2982
2983         // If buffer contains unspecified data when it is sourced (i.e drawn)
2984         // results are undefined, and system errors may occur. Signal parent
2985         // class to take this into account
2986         if (caseFlags & FLAG_PARTIAL)
2987         {
2988                 if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0                      ||
2989                         (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0   ||
2990                         (caseFlags & FLAG_FLUSH_PARTIAL) != 0)
2991                 {
2992                         flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2993                 }
2994         }
2995
2996         return flags;
2997 }
2998
2999 void MapBufferRangeFlushCase::testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3000 {
3001         const int unmapFailureThreshold = 4;
3002
3003         for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3004         {
3005                 try
3006                 {
3007                         attemptBufferMap(result, bufferSize);
3008                         return;
3009                 }
3010                 catch (UnmapFailureError&)
3011                 {
3012                 }
3013         }
3014
3015         throw tcu::TestError("Unmapping failures exceeded limit");
3016 }
3017
3018 void MapBufferRangeFlushCase::attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3019 {
3020         const glw::Functions&   gl                      = m_context.getRenderContext().getFunctions();
3021         const int                               mappedSize      = (m_fullUpload) ? (bufferSize) : (bufferSize / 2);
3022
3023         if (m_fullUpload && !m_flushPartial)
3024                 result.writtenSize = bufferSize;
3025         else
3026                 result.writtenSize = bufferSize / 2;
3027
3028         gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3029
3030         // Create unused buffer
3031
3032         if (m_useUnusedUnspecifiedBuffer)
3033         {
3034                 deUint64 startTime;
3035                 deUint64 endTime;
3036
3037                 // Don't specify contents
3038                 startTime = deGetMicroseconds();
3039                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3040                 endTime = deGetMicroseconds();
3041
3042                 result.duration.allocDuration = endTime - startTime;
3043         }
3044         else if (m_useUnusedSpecifiedBuffer)
3045         {
3046                 deUint64 startTime;
3047                 deUint64 endTime;
3048
3049                 // Specify buffer contents
3050                 startTime = deGetMicroseconds();
3051                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3052                 endTime = deGetMicroseconds();
3053
3054                 result.duration.allocDuration = endTime - startTime;
3055         }
3056         else
3057         {
3058                 // No alloc, no time
3059                 result.duration.allocDuration = 0;
3060         }
3061
3062         // upload
3063         {
3064                 void* mapPtr;
3065
3066                 // Map
3067                 {
3068                         deUint64 startTime;
3069                         deUint64 endTime;
3070
3071                         startTime = deGetMicroseconds();
3072                         if (m_fullUpload)
3073                                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
3074                         else
3075                         {
3076                                 // upload to buffer center
3077                                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
3078                         }
3079                         endTime = deGetMicroseconds();
3080
3081                         if (!mapPtr)
3082                                 throw tcu::Exception("MapBufferRange returned NULL");
3083
3084                         result.duration.mapDuration = endTime - startTime;
3085                 }
3086
3087                 // Write
3088                 {
3089                         if (!m_flushPartial)
3090                                 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3091                         else
3092                                 result.duration.writeDuration = medianTimeMemcpy((deUint8*)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
3093                 }
3094
3095                 // Flush
3096                 {
3097                         deUint64        startTime;
3098                         deUint64        endTime;
3099
3100                         startTime = deGetMicroseconds();
3101
3102                         if (m_flushPartial)
3103                                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize/4, mappedSize/2);
3104                         else if (!m_flushInParts)
3105                                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
3106                         else
3107                         {
3108                                 const int p1 = 0;
3109                                 const int p2 = mappedSize / 3;
3110                                 const int p3 = mappedSize / 2;
3111                                 const int p4 = mappedSize * 2 / 4;
3112                                 const int p5 = mappedSize;
3113
3114                                 // flush in mixed order
3115                                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2,  p3-p2);
3116                                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1,  p2-p1);
3117                                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4,  p5-p4);
3118                                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3,  p4-p3);
3119                         }
3120
3121                         endTime = deGetMicroseconds();
3122
3123                         result.duration.flushDuration = endTime - startTime;
3124                 }
3125
3126                 // Unmap
3127                 {
3128                         deUint64                startTime;
3129                         deUint64                endTime;
3130                         glw::GLboolean  unmapSuccessful;
3131
3132                         startTime = deGetMicroseconds();
3133                         unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3134                         endTime = deGetMicroseconds();
3135
3136                         // if unmapping fails, just try again later
3137                         if (!unmapSuccessful)
3138                                 throw UnmapFailureError();
3139
3140                         result.duration.unmapDuration = endTime - startTime;
3141                 }
3142
3143                 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration;
3144                 result.duration.fitResponseDuration = result.duration.totalDuration;
3145         }
3146 }
3147
3148 template <typename SampleType>
3149 class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
3150 {
3151 public:
3152                                                 ModifyAfterBasicCase    (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest);
3153                                                 ~ModifyAfterBasicCase   (void);
3154
3155         void                            init                                    (void);
3156         void                            deinit                                  (void);
3157
3158 protected:
3159         void                            drawBufferRange                 (int begin, int end);
3160
3161 private:
3162         enum
3163         {
3164                 NUM_SAMPLES = 20,
3165         };
3166
3167
3168         bool                            runSample                               (int iteration, UploadSampleResult<SampleType>& sample);
3169         bool                            prepareAndRunTest               (int iteration, UploadSampleResult<SampleType>& result, int bufferSize);
3170         void                            logAndSetTestResult             (const std::vector<UploadSampleResult<SampleType> >& results);
3171
3172         virtual void            testWithBufferSize              (UploadSampleResult<SampleType>& result, int bufferSize) = 0;
3173
3174         int                                     m_unmappingErrors;
3175
3176 protected:
3177         const bool                      m_bufferUnspecifiedAfterTest;
3178         const deUint32          m_bufferUsage;
3179         std::vector<deUint8> m_zeroData;
3180
3181         using BasicBufferCase<SampleType>::m_testCtx;
3182         using BasicBufferCase<SampleType>::m_context;
3183
3184         using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
3185         using BasicBufferCase<SampleType>::m_dummyProgram;
3186         using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
3187         using BasicBufferCase<SampleType>::m_bufferID;
3188         using BasicBufferCase<SampleType>::m_numSamples;
3189         using BasicBufferCase<SampleType>::m_bufferSizeMin;
3190         using BasicBufferCase<SampleType>::m_bufferSizeMax;
3191         using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
3192 };
3193
3194 template <typename SampleType>
3195 ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest)
3196         : BasicBufferCase<SampleType>   (context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
3197         , m_unmappingErrors                             (0)
3198         , m_bufferUnspecifiedAfterTest  (bufferUnspecifiedAfterTest)
3199         , m_bufferUsage                                 (usage)
3200         , m_zeroData                                    ()
3201 {
3202 }
3203
3204 template <typename SampleType>
3205 ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase (void)
3206 {
3207         BasicBufferCase<SampleType>::deinit();
3208 }
3209
3210 template <typename SampleType>
3211 void ModifyAfterBasicCase<SampleType>::init (void)
3212 {
3213         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3214
3215         // init parent
3216
3217         BasicBufferCase<SampleType>::init();
3218
3219         // upload source
3220         m_zeroData.resize(m_bufferSizeMax, 0x00);
3221
3222         // log basic info
3223
3224         m_testCtx.getLog()
3225                 << tcu::TestLog::Message
3226                 << "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
3227                 << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
3228                 << tcu::TestLog::EndMessage;
3229
3230         // log which transfer rate is the test result and buffer info
3231
3232         m_testCtx.getLog()
3233                 << tcu::TestLog::Message
3234                 << "Test result is the median transfer rate of the test samples.\n"
3235                 << "Buffer usage = " << glu::getUsageName(m_bufferUsage)
3236                 << tcu::TestLog::EndMessage;
3237
3238         // Set state for drawing so that we don't have to change these during the iteration
3239         {
3240                 gl.useProgram(m_dummyProgram->getProgram());
3241                 gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
3242                 gl.enableVertexAttribArray(m_dummyProgramPosLoc);
3243         }
3244 }
3245
3246 template <typename SampleType>
3247 void ModifyAfterBasicCase<SampleType>::deinit (void)
3248 {
3249         m_zeroData = std::vector<deUint8>();
3250
3251         BasicBufferCase<SampleType>::deinit();
3252 }
3253
3254 template <typename SampleType>
3255 void ModifyAfterBasicCase<SampleType>::drawBufferRange (int begin, int end)
3256 {
3257         DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
3258         DE_ASSERT(end % (int)sizeof(float[4]) == 0);
3259
3260         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3261
3262         // use given range
3263         gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
3264         gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
3265 }
3266
3267 template <typename SampleType>
3268 bool ModifyAfterBasicCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
3269 {
3270         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
3271         const int                               bufferSize                      = sample.bufferSize;
3272         bool                                    testOk;
3273
3274         testOk = prepareAndRunTest(iteration, sample, bufferSize);
3275         GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
3276
3277         if (!testOk)
3278         {
3279                 const int unmapFailureThreshold = 4;
3280
3281                 // only unmapping error can cause iteration failure
3282                 if (++m_unmappingErrors >= unmapFailureThreshold)
3283                         throw tcu::TestError("Too many unmapping errors, cannot continue.");
3284
3285                 // just try again
3286                 return false;
3287         }
3288
3289         return true;
3290 }
3291
3292 template <typename SampleType>
3293 bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize)
3294 {
3295         DE_UNREF(iteration);
3296
3297         DE_ASSERT(!m_bufferID);
3298         DE_ASSERT(deIsAligned32(bufferSize, 4*4)); // aligned to vec4
3299
3300         const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
3301         bool                                            testRunOk               = true;
3302         bool                                            unmappingFailed = false;
3303
3304         // Upload initial buffer to the GPU...
3305         gl.genBuffers(1, &m_bufferID);
3306         gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3307         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3308
3309         // ...use it...
3310         gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
3311         drawBufferRange(0, bufferSize);
3312
3313         // ..and make sure it is uploaded
3314         BasicBufferCase<SampleType>::waitGLResults();
3315
3316         // warmup CPU before the test to make sure the power management governor
3317         // keeps us in the "high performance" mode
3318         {
3319                 deYield();
3320                 tcu::warmupCPU();
3321                 deYield();
3322         }
3323
3324         // test
3325         try
3326         {
3327                 // buffer is uploaded to the GPU. Draw from it.
3328                 drawBufferRange(0, bufferSize);
3329
3330                 // and test upload
3331                 testWithBufferSize(result, bufferSize);
3332         }
3333         catch (UnmapFailureError&)
3334         {
3335                 testRunOk = false;
3336                 unmappingFailed = true;
3337         }
3338
3339         // clean up: make sure buffer is not in upload queue and delete it
3340
3341         // sourcing unspecified data causes undefined results, possibly program termination
3342         if (m_bufferUnspecifiedAfterTest || unmappingFailed)
3343                 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3344
3345         drawBufferRange(0, bufferSize);
3346         BasicBufferCase<SampleType>::waitGLResults();
3347
3348         gl.deleteBuffers(1, &m_bufferID);
3349         m_bufferID = 0;
3350
3351         return testRunOk;
3352 }
3353
3354 template <typename SampleType>
3355 void ModifyAfterBasicCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
3356 {
3357         const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);
3358
3359         // Return median transfer rate of the samples
3360
3361         if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
3362         {
3363                 // sample times are 1) invalid or 2) timer resolution too low
3364                 // report speed 0 bytes / s since real value cannot be determined
3365                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
3366         }
3367         else
3368         {
3369                 // report transfer rate in MB / s
3370                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
3371         }
3372 }
3373
3374 class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3375 {
3376 public:
3377
3378         enum CaseFlags
3379         {
3380                 FLAG_RESPECIFY_SIZE             = 0x1,
3381                 FLAG_UPLOAD_REPEATED    = 0x2,
3382         };
3383
3384                                         ModifyAfterWithBufferDataCase   (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3385                                         ~ModifyAfterWithBufferDataCase  (void);
3386
3387         void                    init                                                    (void);
3388         void                    deinit                                                  (void);
3389 private:
3390         void                    testWithBufferSize                              (UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3391
3392         enum
3393         {
3394                 NUM_REPEATS = 2
3395         };
3396
3397         const bool              m_respecifySize;
3398         const bool              m_repeatedUpload;
3399         const float             m_sizeDifferenceFactor;
3400 };
3401
3402 ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3403         : ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3404         , m_respecifySize                                                               ((flags & FLAG_RESPECIFY_SIZE) != 0)
3405         , m_repeatedUpload                                                              ((flags & FLAG_UPLOAD_REPEATED) != 0)
3406         , m_sizeDifferenceFactor                                                (1.3f)
3407 {
3408         DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
3409 }
3410
3411 ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase (void)
3412 {
3413         deinit();
3414 }
3415
3416 void ModifyAfterWithBufferDataCase::init (void)
3417 {
3418         // Log the purpose of the test
3419
3420         if (m_repeatedUpload)
3421                 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3422         else
3423                 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3424
3425         m_testCtx.getLog()
3426                 << tcu::TestLog::Message
3427                 << ((m_respecifySize) ?
3428                         ("Buffer size is increased and contents are modified with BufferData().\n") :
3429                         ("Buffer contents are modified with BufferData().\n"))
3430                 << tcu::TestLog::EndMessage;
3431
3432         // init parent
3433         ModifyAfterBasicCase<SingleOperationDuration>::init();
3434
3435         // make sure our zeroBuffer is large enough
3436         if (m_respecifySize)
3437         {
3438                 const int largerBufferSize = deAlign32((int)((float)m_bufferSizeMax * m_sizeDifferenceFactor), 4*4);
3439                 m_zeroData.resize(largerBufferSize, 0x00);
3440         }
3441 }
3442
3443 void ModifyAfterWithBufferDataCase::deinit (void)
3444 {
3445         ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3446 }
3447
3448 void ModifyAfterWithBufferDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3449 {
3450         // always draw the same amount to make compares between cases sensible
3451         const int                                       drawStart                       = deAlign32(bufferSize / 4, 4*4);
3452         const int                                       drawEnd                         = deAlign32(bufferSize * 3 / 4, 4*4);
3453
3454         const glw::Functions&           gl                                      = m_context.getRenderContext().getFunctions();
3455         const int                                       largerBufferSize        = deAlign32((int)((float)bufferSize * m_sizeDifferenceFactor), 4*4);
3456         const int                                       newBufferSize           = (m_respecifySize) ? (largerBufferSize) : (bufferSize);
3457         deUint64                                        startTime;
3458         deUint64                                        endTime;
3459
3460         // repeat upload-draw
3461         if (m_repeatedUpload)
3462         {
3463                 for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3464                 {
3465                         gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3466                         drawBufferRange(drawStart, drawEnd);
3467                 }
3468         }
3469
3470         // test upload
3471         startTime = deGetMicroseconds();
3472         gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3473         endTime = deGetMicroseconds();
3474
3475         result.duration.totalDuration = endTime - startTime;
3476         result.duration.fitResponseDuration = result.duration.totalDuration;
3477         result.writtenSize = newBufferSize;
3478 }
3479
3480 class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3481 {
3482 public:
3483
3484         enum CaseFlags
3485         {
3486                 FLAG_PARTIAL                    = 0x1,
3487                 FLAG_UPLOAD_REPEATED    = 0x2,
3488         };
3489
3490                                         ModifyAfterWithBufferSubDataCase        (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3491                                         ~ModifyAfterWithBufferSubDataCase       (void);
3492
3493         void                    init                                                            (void);
3494         void                    deinit                                                          (void);
3495 private:
3496         void                    testWithBufferSize                                      (UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3497
3498         enum
3499         {
3500                 NUM_REPEATS = 2
3501         };
3502
3503         const bool              m_partialUpload;
3504         const bool              m_repeatedUpload;
3505 };
3506
3507 ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3508         : ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3509         , m_partialUpload                                                               ((flags & FLAG_PARTIAL) != 0)
3510         , m_repeatedUpload                                                              ((flags & FLAG_UPLOAD_REPEATED) != 0)
3511 {
3512 }
3513
3514 ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase (void)
3515 {
3516         deinit();
3517 }
3518
3519 void ModifyAfterWithBufferSubDataCase::init (void)
3520 {
3521         // Log the purpose of the test
3522
3523         if (m_repeatedUpload)
3524                 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3525         else
3526                 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3527
3528         m_testCtx.getLog()
3529                 << tcu::TestLog::Message
3530                 << ((m_partialUpload) ?
3531                         ("Half of the buffer contents are modified.\n") :
3532                         ("Buffer contents are fully respecified.\n"))
3533                 << tcu::TestLog::EndMessage;
3534
3535         ModifyAfterBasicCase<SingleOperationDuration>::init();
3536 }
3537
3538 void ModifyAfterWithBufferSubDataCase::deinit (void)
3539 {
3540         ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3541 }
3542
3543 void ModifyAfterWithBufferSubDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3544 {
3545         // always draw the same amount to make compares between cases sensible
3546         const int                                       drawStart                       = deAlign32(bufferSize / 4, 4*4);
3547         const int                                       drawEnd                         = deAlign32(bufferSize * 3 / 4, 4*4);
3548
3549         const glw::Functions&           gl                                      = m_context.getRenderContext().getFunctions();
3550         const int                                       subdataOffset           = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3551         const int                                       subdataSize                     = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3552         deUint64                                        startTime;
3553         deUint64                                        endTime;
3554
3555         // make upload-draw stream
3556         if (m_repeatedUpload)
3557         {
3558                 for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3559                 {
3560                         gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3561                         drawBufferRange(drawStart, drawEnd);
3562                 }
3563         }
3564
3565         // test upload
3566         startTime = deGetMicroseconds();
3567         gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3568         endTime = deGetMicroseconds();
3569
3570         result.duration.totalDuration = endTime - startTime;
3571         result.duration.fitResponseDuration = result.duration.totalDuration;
3572         result.writtenSize = subdataSize;
3573 }
3574
3575 class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
3576 {
3577 public:
3578
3579         enum CaseFlags
3580         {
3581                 FLAG_PARTIAL = 0x1,
3582         };
3583
3584                                         ModifyAfterWithMapBufferRangeCase       (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3585                                         ~ModifyAfterWithMapBufferRangeCase      (void);
3586
3587         void                    init                                                            (void);
3588         void                    deinit                                                          (void);
3589 private:
3590         static bool             isBufferUnspecifiedAfterUpload          (int flags, deUint32 mapFlags);
3591         void                    testWithBufferSize                                      (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize);
3592
3593         const bool              m_partialUpload;
3594         const deUint32  m_mapFlags;
3595 };
3596
3597 ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3598         : ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>   (context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3599         , m_partialUpload                                                                               ((flags & FLAG_PARTIAL) != 0)
3600         , m_mapFlags                                                                                    (glMapFlags)
3601 {
3602 }
3603
3604 ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase (void)
3605 {
3606         deinit();
3607 }
3608
3609 void ModifyAfterWithMapBufferRangeCase::init (void)
3610 {
3611         // Log the purpose of the test
3612
3613         m_testCtx.getLog()
3614                 << tcu::TestLog::Message
3615                 << "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3616                 << ((m_partialUpload) ?
3617                         ("Half of the buffer is mapped.\n") :
3618                         ("Whole buffer is mapped.\n"))
3619                 << "Map bits:\n"
3620                 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3621                 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3622                 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3623                 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3624                 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3625                 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3626                 << tcu::TestLog::EndMessage;
3627
3628         ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
3629 }
3630
3631 void ModifyAfterWithMapBufferRangeCase::deinit (void)
3632 {
3633         ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
3634 }
3635
3636 bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3637 {
3638         if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3639                 return true;
3640
3641         return false;
3642 }
3643
3644 void ModifyAfterWithMapBufferRangeCase::testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize)
3645 {
3646         const glw::Functions&           gl                                      = m_context.getRenderContext().getFunctions();
3647         const int                                       subdataOffset           = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3648         const int                                       subdataSize                     = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3649         void*                                           mapPtr;
3650
3651         // map
3652         {
3653                 deUint64 startTime;
3654                 deUint64 endTime;
3655
3656                 startTime = deGetMicroseconds();
3657                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3658                 endTime = deGetMicroseconds();
3659
3660                 if (!mapPtr)
3661                         throw tcu::TestError("mapBufferRange returned null");
3662
3663                 result.duration.mapDuration = endTime - startTime;
3664         }
3665
3666         // write
3667         {
3668                 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3669         }
3670
3671         // unmap
3672         {
3673                 deUint64                startTime;
3674                 deUint64                endTime;
3675                 glw::GLboolean  unmapSucceeded;
3676
3677                 startTime = deGetMicroseconds();
3678                 unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3679                 endTime = deGetMicroseconds();
3680
3681                 if (unmapSucceeded != GL_TRUE)
3682                         throw UnmapFailureError();
3683
3684                 result.duration.unmapDuration = endTime - startTime;
3685         }
3686
3687         result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
3688         result.duration.fitResponseDuration = result.duration.totalDuration;
3689         result.writtenSize = subdataSize;
3690 }
3691
3692 class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
3693 {
3694 public:
3695
3696         enum CaseFlags
3697         {
3698                 FLAG_PARTIAL = 0x1,
3699         };
3700
3701                                         ModifyAfterWithMapBufferFlushCase       (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3702                                         ~ModifyAfterWithMapBufferFlushCase      (void);
3703
3704         void                    init                                                            (void);
3705         void                    deinit                                                          (void);
3706 private:
3707         static bool             isBufferUnspecifiedAfterUpload          (int flags, deUint32 mapFlags);
3708         void                    testWithBufferSize                                      (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize);
3709
3710         const bool              m_partialUpload;
3711         const deUint32  m_mapFlags;
3712 };
3713
3714 ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3715         : ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>      (context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3716         , m_partialUpload                                                                                       ((flags & FLAG_PARTIAL) != 0)
3717         , m_mapFlags                                                                                            (glMapFlags)
3718 {
3719 }
3720
3721 ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase (void)
3722 {
3723         deinit();
3724 }
3725
3726 void ModifyAfterWithMapBufferFlushCase::init (void)
3727 {
3728         // Log the purpose of the test
3729
3730         m_testCtx.getLog()
3731                 << tcu::TestLog::Message
3732                 << "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3733                 << ((m_partialUpload) ?
3734                         ("Half of the buffer is mapped.\n") :
3735                         ("Whole buffer is mapped.\n"))
3736                 << "Map bits:\n"
3737                 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3738                 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3739                 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3740                 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3741                 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3742                 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3743                 << tcu::TestLog::EndMessage;
3744
3745         ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
3746 }
3747
3748 void ModifyAfterWithMapBufferFlushCase::deinit (void)
3749 {
3750         ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
3751 }
3752
3753 bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3754 {
3755         if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3756                 return true;
3757
3758         return false;
3759 }
3760
3761 void ModifyAfterWithMapBufferFlushCase::testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize)
3762 {
3763         const glw::Functions&           gl                                      = m_context.getRenderContext().getFunctions();
3764         const int                                       subdataOffset           = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3765         const int                                       subdataSize                     = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3766         void*                                           mapPtr;
3767
3768         // map
3769         {
3770                 deUint64 startTime;
3771                 deUint64 endTime;
3772
3773                 startTime = deGetMicroseconds();
3774                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3775                 endTime = deGetMicroseconds();
3776
3777                 if (!mapPtr)
3778                         throw tcu::TestError("mapBufferRange returned null");
3779
3780                 result.duration.mapDuration = endTime - startTime;
3781         }
3782
3783         // write
3784         {
3785                 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3786         }
3787
3788         // flush
3789         {
3790                 deUint64 startTime;
3791                 deUint64 endTime;
3792
3793                 startTime = deGetMicroseconds();
3794                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
3795                 endTime = deGetMicroseconds();
3796
3797                 result.duration.flushDuration = endTime - startTime;
3798         }
3799
3800         // unmap
3801         {
3802                 deUint64                startTime;
3803                 deUint64                endTime;
3804                 glw::GLboolean  unmapSucceeded;
3805
3806                 startTime = deGetMicroseconds();
3807                 unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3808                 endTime = deGetMicroseconds();
3809
3810                 if (unmapSucceeded != GL_TRUE)
3811                         throw UnmapFailureError();
3812
3813                 result.duration.unmapDuration = endTime - startTime;
3814         }
3815
3816         result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration;
3817         result.duration.fitResponseDuration = result.duration.totalDuration;
3818         result.writtenSize = subdataSize;
3819 }
3820
3821 enum DrawMethod
3822 {
3823         DRAWMETHOD_DRAW_ARRAYS = 0,
3824         DRAWMETHOD_DRAW_ELEMENTS,
3825
3826         DRAWMETHOD_LAST
3827 };
3828
3829 enum TargetBuffer
3830 {
3831         TARGETBUFFER_VERTEX = 0,
3832         TARGETBUFFER_INDEX,
3833
3834         TARGETBUFFER_LAST
3835 };
3836
3837 enum BufferState
3838 {
3839         BUFFERSTATE_NEW = 0,
3840         BUFFERSTATE_EXISTING,
3841
3842         BUFFERSTATE_LAST
3843 };
3844
3845 enum UploadMethod
3846 {
3847         UPLOADMETHOD_BUFFER_DATA = 0,
3848         UPLOADMETHOD_BUFFER_SUB_DATA,
3849         UPLOADMETHOD_MAP_BUFFER_RANGE,
3850
3851         UPLOADMETHOD_LAST
3852 };
3853
3854 enum UnrelatedBufferType
3855 {
3856         UNRELATEDBUFFERTYPE_NONE = 0,
3857         UNRELATEDBUFFERTYPE_VERTEX,
3858
3859         UNRELATEDBUFFERTYPE_LAST
3860 };
3861
3862 enum UploadRange
3863 {
3864         UPLOADRANGE_FULL = 0,
3865         UPLOADRANGE_PARTIAL,
3866
3867         UPLOADRANGE_LAST
3868 };
3869
3870 struct LayeredGridSpec
3871 {
3872         int gridWidth;
3873         int gridHeight;
3874         int gridLayers;
3875 };
3876
3877 static int getLayeredGridNumVertices (const LayeredGridSpec& scene)
3878 {
3879         return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
3880 }
3881
3882 static void generateLayeredGridVertexAttribData4C4V (std::vector<tcu::Vec4>& vertexData, const LayeredGridSpec& scene)
3883 {
3884         // interleave color & vertex data
3885         const tcu::Vec4 green   (0.0f, 1.0f, 0.0f, 0.7f);
3886         const tcu::Vec4 yellow  (1.0f, 1.0f, 0.0f, 0.8f);
3887
3888         vertexData.resize(getLayeredGridNumVertices(scene) * 2);
3889
3890         for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
3891         for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
3892         for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
3893         {
3894                 const tcu::Vec4 color           = (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
3895                 const float             cellLeft        = (float(cellX  ) / (float)scene.gridWidth  - 0.5f) * 2.0f;
3896                 const float             cellRight       = (float(cellX+1) / (float)scene.gridWidth  - 0.5f) * 2.0f;
3897                 const float             cellTop         = (float(cellY+1) / (float)scene.gridHeight - 0.5f) * 2.0f;
3898                 const float             cellBottom      = (float(cellY  ) / (float)scene.gridHeight - 0.5f) * 2.0f;
3899
3900                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  0] = color;
3901                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3902
3903                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  2] = color;
3904                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);
3905
3906                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  4] = color;
3907                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3908
3909                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  6] = color;
3910                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3911
3912                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  8] = color;
3913                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3914
3915                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color;
3916                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
3917         }
3918 }
3919
3920 static void generateLayeredGridIndexData (std::vector<deUint32>& indexData, const LayeredGridSpec& scene)
3921 {
3922         indexData.resize(getLayeredGridNumVertices(scene) * 2);
3923
3924         for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
3925                 indexData[ndx] = ndx;
3926 }
3927
3928 class RenderPerformanceTestBase : public TestCase
3929 {
3930 public:
3931                                                         RenderPerformanceTestBase       (Context& context, const char* name, const char* description);
3932                                                         ~RenderPerformanceTestBase      (void);
3933
3934 protected:
3935         void                                    init                                            (void);
3936         void                                    deinit                                          (void);
3937
3938         void                                    waitGLResults                           (void) const;
3939         void                                    setupVertexAttribs                      (void) const;
3940
3941         enum
3942         {
3943                 RENDER_AREA_SIZE = 128
3944         };
3945
3946 private:
3947         glu::ShaderProgram*             m_renderProgram;
3948         int                                             m_colorLoc;
3949         int                                             m_positionLoc;
3950 };
3951
3952 RenderPerformanceTestBase::RenderPerformanceTestBase (Context& context, const char* name, const char* description)
3953         : TestCase                      (context, tcu::NODETYPE_PERFORMANCE, name, description)
3954         , m_renderProgram       (DE_NULL)
3955         , m_colorLoc            (0)
3956         , m_positionLoc         (0)
3957 {
3958 }
3959
3960 RenderPerformanceTestBase::~RenderPerformanceTestBase (void)
3961 {
3962         deinit();
3963 }
3964
3965 void RenderPerformanceTestBase::init (void)
3966 {
3967         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3968
3969         m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader));
3970         if (!m_renderProgram->isOk())
3971         {
3972                 m_testCtx.getLog() << *m_renderProgram;
3973                 throw tcu::TestError("could not build program");
3974         }
3975
3976         m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
3977         m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");
3978
3979         if (m_colorLoc == -1)
3980                 throw tcu::TestError("Location of attribute a_color was -1");
3981         if (m_positionLoc == -1)
3982                 throw tcu::TestError("Location of attribute a_position was -1");
3983 }
3984
3985 void RenderPerformanceTestBase::deinit (void)
3986 {
3987         delete m_renderProgram;
3988         m_renderProgram = DE_NULL;
3989 }
3990
3991 void RenderPerformanceTestBase::setupVertexAttribs (void) const
3992 {
3993         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3994
3995         // buffers are bound
3996
3997         gl.enableVertexAttribArray(m_colorLoc);
3998         gl.enableVertexAttribArray(m_positionLoc);
3999
4000         gl.vertexAttribPointer(m_colorLoc,    4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 0);
4001         gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 1);
4002
4003         gl.useProgram(m_renderProgram->getProgram());
4004
4005         GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
4006 }
4007
4008 void RenderPerformanceTestBase::waitGLResults (void) const
4009 {
4010         tcu::Surface dummySurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4011         glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
4012 }
4013
4014 template <typename SampleType>
4015 class RenderCase : public RenderPerformanceTestBase
4016 {
4017 public:
4018                                                                         RenderCase                                              (Context& context, const char* name, const char* description, DrawMethod drawMethod);
4019                                                                         ~RenderCase                                             (void);
4020
4021 protected:
4022         void                                                    init                                                    (void);
4023         void                                                    deinit                                                  (void);
4024
4025 private:
4026         IterateResult                                   iterate                                                 (void);
4027
4028 protected:
4029         struct SampleResult
4030         {
4031                 LayeredGridSpec                                 scene;
4032                 RenderSampleResult<SampleType>  result;
4033         };
4034
4035         int                                                             getMinWorkloadSize                              (void) const;
4036         int                                                             getMaxWorkloadSize                              (void) const;
4037         int                                                             getMinWorkloadDataSize                  (void) const;
4038         int                                                             getMaxWorkloadDataSize                  (void) const;
4039         int                                                             getVertexDataSize                               (void) const;
4040         int                                                             getNumSamples                                   (void) const;
4041         void                                                    uploadScene                                             (const LayeredGridSpec& scene);
4042
4043         virtual void                                    runSample                                               (SampleResult& sample) = 0;
4044         virtual void                                    logAndSetTestResult                             (const std::vector<SampleResult>& results);
4045
4046         void                                                    mapResultsToRenderRateFormat    (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const;
4047
4048         const DrawMethod                                m_drawMethod;
4049
4050 private:
4051         glw::GLuint                                             m_attributeBufferID;
4052         glw::GLuint                                             m_indexBufferID;
4053         int                                                             m_iterationNdx;
4054         std::vector<int>                                m_iterationOrder;
4055         std::vector<SampleResult>               m_results;
4056         int                                                             m_numUnmapFailures;
4057 };
4058
4059 template <typename SampleType>
4060 RenderCase<SampleType>::RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4061         : RenderPerformanceTestBase     (context, name, description)
4062         , m_drawMethod                          (drawMethod)
4063         , m_attributeBufferID           (0)
4064         , m_indexBufferID                       (0)
4065         , m_iterationNdx                        (0)
4066         , m_numUnmapFailures            (0)
4067 {
4068         DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
4069 }
4070
4071 template <typename SampleType>
4072 RenderCase<SampleType>::~RenderCase (void)
4073 {
4074         deinit();
4075 }
4076
4077 template <typename SampleType>
4078 void RenderCase<SampleType>::init (void)
4079 {
4080         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4081
4082         RenderPerformanceTestBase::init();
4083
4084         // requirements
4085
4086         if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
4087                 m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
4088                 throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
4089
4090         // gl state
4091
4092         gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4093
4094         // enable bleding to prevent grid layers from being discarded
4095         gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
4096         gl.blendEquation(GL_FUNC_ADD);
4097         gl.enable(GL_BLEND);
4098
4099         // generate iterations
4100
4101         {
4102                 const int gridSizes[] = { 20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80,  86,  92,  98,  104, 110, 116, 122, 128 };
4103
4104                 for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
4105                 {
4106                         m_results.push_back(SampleResult());
4107
4108                         m_results.back().scene.gridHeight = gridSizes[gridNdx];
4109                         m_results.back().scene.gridWidth = gridSizes[gridNdx];
4110                         m_results.back().scene.gridLayers = 5;
4111
4112                         m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);
4113
4114                         // test cases set these, initialize to dummy values
4115                         m_results.back().result.renderDataSize = -1;
4116                         m_results.back().result.uploadedDataSize = -1;
4117                         m_results.back().result.unrelatedDataSize = -1;
4118                 }
4119         }
4120
4121         // randomize iteration order
4122         {
4123                 m_iterationOrder.resize(m_results.size());
4124                 generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
4125         }
4126 }
4127
4128 template <typename SampleType>
4129 void RenderCase<SampleType>::deinit (void)
4130 {
4131         RenderPerformanceTestBase::deinit();
4132
4133         if (m_attributeBufferID)
4134         {
4135                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
4136                 m_attributeBufferID = 0;
4137         }
4138
4139         if (m_indexBufferID)
4140         {
4141                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
4142                 m_indexBufferID = 0;
4143         }
4144 }
4145
4146 template <typename SampleType>
4147 typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate (void)
4148 {
4149         const int               unmapFailureThreshold   = 3;
4150         const int               currentIteration                = m_iterationNdx;
4151         const int               currentConfigNdx                = m_iterationOrder[currentIteration];
4152         SampleResult&   currentSample                   = m_results[currentConfigNdx];
4153
4154         try
4155         {
4156                 runSample(currentSample);
4157                 ++m_iterationNdx;
4158         }
4159         catch (const UnmapFailureError& ex)
4160         {
4161                 DE_UNREF(ex);
4162                 ++m_numUnmapFailures;
4163         }
4164
4165         if (m_numUnmapFailures > unmapFailureThreshold)
4166                 throw tcu::TestError("Got too many unmap errors");
4167
4168         if (m_iterationNdx < (int)m_iterationOrder.size())
4169                 return CONTINUE;
4170
4171         logAndSetTestResult(m_results);
4172         return STOP;
4173 }
4174
4175 template <typename SampleType>
4176 int RenderCase<SampleType>::getMinWorkloadSize (void) const
4177 {
4178         int result = getLayeredGridNumVertices(m_results[0].scene);
4179
4180         for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4181         {
4182                 const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4183                 result = de::min(result, workloadSize);
4184         }
4185
4186         return result;
4187 }
4188
4189 template <typename SampleType>
4190 int RenderCase<SampleType>::getMaxWorkloadSize (void) const
4191 {
4192         int result = getLayeredGridNumVertices(m_results[0].scene);
4193
4194         for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4195         {
4196                 const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4197                 result = de::max(result, workloadSize);
4198         }
4199
4200         return result;
4201 }
4202
4203 template <typename SampleType>
4204 int RenderCase<SampleType>::getMinWorkloadDataSize (void) const
4205 {
4206         return getMinWorkloadSize() * getVertexDataSize();
4207 }
4208
4209 template <typename SampleType>
4210 int RenderCase<SampleType>::getMaxWorkloadDataSize (void) const
4211 {
4212         return getMaxWorkloadSize() * getVertexDataSize();
4213 }
4214
4215 template <typename SampleType>
4216 int RenderCase<SampleType>::getVertexDataSize (void) const
4217 {
4218         const int numVectors    = 2;
4219         const int vec4Size              = 4 * sizeof(float);
4220
4221         return numVectors * vec4Size;
4222 }
4223
4224 template <typename SampleType>
4225 int RenderCase<SampleType>::getNumSamples (void) const
4226 {
4227         return (int)m_results.size();
4228 }
4229
4230 template <typename SampleType>
4231 void RenderCase<SampleType>::uploadScene (const LayeredGridSpec& scene)
4232 {
4233         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4234
4235         // vertex buffer
4236         {
4237                 std::vector<tcu::Vec4> vertexData;
4238
4239                 generateLayeredGridVertexAttribData4C4V(vertexData, scene);
4240
4241                 if (m_attributeBufferID == 0)
4242                         gl.genBuffers(1, &m_attributeBufferID);
4243                 gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
4244                 gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4245         }
4246
4247         // index buffer
4248         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4249         {
4250                 std::vector<deUint32> indexData;
4251
4252                 generateLayeredGridIndexData(indexData, scene);
4253
4254                 if (m_indexBufferID == 0)
4255                         gl.genBuffers(1, &m_indexBufferID);
4256                 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
4257                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4258         }
4259
4260         GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
4261 }
4262
4263 template <typename SampleType>
4264 void RenderCase<SampleType>::logAndSetTestResult (const std::vector<SampleResult>& results)
4265 {
4266         std::vector<RenderSampleResult<SampleType> > mappedResults;
4267
4268         mapResultsToRenderRateFormat(mappedResults, results);
4269
4270         {
4271                 const RenderSampleAnalyzeResult analysis        = analyzeSampleResults(m_testCtx.getLog(), mappedResults);
4272                 const float                                             rate            = analysis.renderRateAtRange;
4273
4274                 if (rate == std::numeric_limits<float>::infinity())
4275                 {
4276                         // sample times are 1) invalid or 2) timer resolution too low
4277                         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
4278                 }
4279                 else
4280                 {
4281                         // report transfer rate in millions of MiB/s
4282                         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
4283                 }
4284         }
4285 }
4286
4287 template <typename SampleType>
4288 void RenderCase<SampleType>::mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const
4289 {
4290         dst.resize(src.size());
4291
4292         for (int ndx = 0; ndx < (int)src.size(); ++ndx)
4293                 dst[ndx] = src[ndx].result;
4294 }
4295
4296 class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
4297 {
4298 public:
4299                         ReferenceRenderTimeCase         (Context& context, const char* name, const char* description, DrawMethod drawMethod);
4300
4301 private:
4302         void    init                                            (void);
4303         void    runSample                                       (SampleResult& sample);
4304 };
4305
4306 ReferenceRenderTimeCase::ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4307         : RenderCase<RenderReadDuration>        (context, name, description, drawMethod)
4308 {
4309 }
4310
4311 void ReferenceRenderTimeCase::init (void)
4312 {
4313         const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4314
4315         // init parent
4316         RenderCase<RenderReadDuration>::init();
4317
4318         // log
4319         m_testCtx.getLog()
4320                 << tcu::TestLog::Message
4321                 << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4322                 << getNumSamples() << " test samples. Sample order is randomized.\n"
4323                 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4324                 << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4325                 << "Workload sizes are in the range ["
4326                         << getMinWorkloadSize() << ",  "
4327                         << getMaxWorkloadSize() << "] vertices (["
4328                         << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4329                         << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4330                 << "Test result is the approximated total processing rate in MiB / s.\n"
4331                 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4332                 << "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4333                 << tcu::TestLog::EndMessage;
4334 }
4335
4336 void ReferenceRenderTimeCase::runSample (SampleResult& sample)
4337 {
4338         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
4339         tcu::Surface                    resultSurface   (RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4340         const int                               numVertices             = getLayeredGridNumVertices(sample.scene);
4341         const glu::Buffer               arrayBuffer             (m_context.getRenderContext());
4342         const glu::Buffer               indexBuffer             (m_context.getRenderContext());
4343         std::vector<tcu::Vec4>  vertexData;
4344         std::vector<deUint32>   indexData;
4345         deUint64                                startTime;
4346         deUint64                                endTime;
4347
4348         // generate and upload buffers
4349
4350         generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4351         gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4352         gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4353
4354         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4355         {
4356                 generateLayeredGridIndexData(indexData, sample.scene);
4357                 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4358                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4359         }
4360
4361         setupVertexAttribs();
4362
4363         // make sure data is uploaded
4364
4365         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4366                 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4367         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4368                 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4369         else
4370                 DE_ASSERT(false);
4371         waitGLResults();
4372
4373         gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4374         gl.clear(GL_COLOR_BUFFER_BIT);
4375         waitGLResults();
4376
4377         tcu::warmupCPU();
4378
4379         // Measure both draw and associated readpixels
4380         {
4381                 startTime = deGetMicroseconds();
4382
4383                 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4384                         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4385                 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4386                         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4387                 else
4388                         DE_ASSERT(false);
4389
4390                 endTime = deGetMicroseconds();
4391
4392                 sample.result.duration.renderDuration = endTime - startTime;
4393         }
4394
4395         {
4396                 startTime = deGetMicroseconds();
4397                 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4398                 endTime = deGetMicroseconds();
4399
4400                 sample.result.duration.readDuration = endTime - startTime;
4401         }
4402
4403         sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4404         sample.result.uploadedDataSize = 0;
4405         sample.result.unrelatedDataSize = 0;
4406         sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4407         sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4408         sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4409 }
4410
4411 class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
4412 {
4413 public:
4414                                                                         UnrelatedUploadRenderTimeCase   (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod);
4415
4416 private:
4417         void                                                    init                                                    (void);
4418         void                                                    runSample                                               (SampleResult& sample);
4419
4420         const UploadMethod                              m_unrelatedUploadMethod;
4421 };
4422
4423 UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod)
4424         : RenderCase<UnrelatedUploadRenderReadDuration> (context, name, description, drawMethod)
4425         , m_unrelatedUploadMethod                                               (unrelatedUploadMethod)
4426 {
4427         DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
4428 }
4429
4430 void UnrelatedUploadRenderTimeCase::init (void)
4431 {
4432         const char* const       targetFunctionName      = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4433         tcu::MessageBuilder     message                         (&m_testCtx.getLog());
4434
4435         // init parent
4436         RenderCase<UnrelatedUploadRenderReadDuration>::init();
4437
4438         // log
4439
4440         message
4441                 << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4442                 << "Uploading an unrelated buffer just before issuing the rendering command with "
4443                         << ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)               ? ("bufferData")                :
4444                                 (m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)       ? ("bufferSubData")             :
4445                                 (m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE)      ? ("mapBufferRange")    :
4446                                 ((const char*)DE_NULL))
4447                         << ".\n"
4448                 << getNumSamples() << " test samples. Sample order is randomized.\n"
4449                 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4450                 << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4451                 << "Workload sizes are in the range ["
4452                         << getMinWorkloadSize() << ",  "
4453                         << getMaxWorkloadSize() << "] vertices (["
4454                         << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4455                         << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4456                 << "Unrelated upload sizes are in the range ["
4457                         << getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
4458                         << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
4459                 << "Test result is the approximated total processing rate in MiB / s.\n"
4460                 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4461                 << "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
4462                 << "Note! Test result may not be useful as is but instead should be compared against the reference.* group and upload_and_draw.*_and_unrelated_upload group results.\n"
4463                 << tcu::TestLog::EndMessage;
4464 }
4465
4466 void UnrelatedUploadRenderTimeCase::runSample (SampleResult& sample)
4467 {
4468         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
4469         tcu::Surface                    resultSurface           (RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4470         const int                               numVertices                     = getLayeredGridNumVertices(sample.scene);
4471         const glu::Buffer               arrayBuffer                     (m_context.getRenderContext());
4472         const glu::Buffer               indexBuffer                     (m_context.getRenderContext());
4473         const glu::Buffer               unrelatedBuffer         (m_context.getRenderContext());
4474         int                                             unrelatedUploadSize     = -1;
4475         int                                             renderUploadSize;
4476         std::vector<tcu::Vec4>  vertexData;
4477         std::vector<deUint32>   indexData;
4478         deUint64                                startTime;
4479         deUint64                                endTime;
4480
4481         // generate and upload buffers
4482
4483         generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4484         renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4485
4486         gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4487         gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);
4488
4489         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4490         {
4491                 generateLayeredGridIndexData(indexData, sample.scene);
4492                 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4493                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4494         }
4495
4496         setupVertexAttribs();
4497
4498         // make sure data is uploaded
4499
4500         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4501                 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4502         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4503                 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4504         else
4505                 DE_ASSERT(false);
4506         waitGLResults();
4507
4508         gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4509         gl.clear(GL_COLOR_BUFFER_BIT);
4510         waitGLResults();
4511
4512         tcu::warmupCPU();
4513
4514         // Unrelated upload
4515         if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
4516         {
4517                 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4518
4519                 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4520                 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4521         }
4522         else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4523         {
4524                 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4525
4526                 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4527                 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4528                 gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
4529         }
4530         else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4531         {
4532                 void*                   mapPtr;
4533                 glw::GLboolean  unmapSuccessful;
4534
4535                 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4536
4537                 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4538                 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4539
4540                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4541                 if (!mapPtr)
4542                         throw tcu::Exception("MapBufferRange returned NULL");
4543
4544                 deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);
4545
4546                 // if unmapping fails, just try again later
4547                 unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
4548                 if (!unmapSuccessful)
4549                         throw UnmapFailureError();
4550         }
4551         else
4552                 DE_ASSERT(false);
4553
4554         DE_ASSERT(unrelatedUploadSize != -1);
4555
4556         // Measure both draw and associated readpixels
4557         {
4558                 startTime = deGetMicroseconds();
4559
4560                 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4561                         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4562                 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4563                         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4564                 else
4565                         DE_ASSERT(false);
4566
4567                 endTime = deGetMicroseconds();
4568
4569                 sample.result.duration.renderDuration = endTime - startTime;
4570         }
4571
4572         {
4573                 startTime = deGetMicroseconds();
4574                 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4575                 endTime = deGetMicroseconds();
4576
4577                 sample.result.duration.readDuration = endTime - startTime;
4578         }
4579
4580         sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4581         sample.result.uploadedDataSize = renderUploadSize;
4582         sample.result.unrelatedDataSize = unrelatedUploadSize;
4583         sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4584         sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4585         sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4586 }
4587
4588 class ReferenceReadPixelsTimeCase : public TestCase
4589 {
4590 public:
4591                                         ReferenceReadPixelsTimeCase             (Context& context, const char* name, const char* description);
4592
4593 private:
4594         void                    init                                                    (void);
4595         IterateResult   iterate                                                 (void);
4596         void                    logAndSetTestResult                             (void);
4597
4598         enum
4599         {
4600                 RENDER_AREA_SIZE = 128
4601         };
4602
4603         const int                       m_numSamples;
4604         int                                     m_sampleNdx;
4605         std::vector<int>        m_samples;
4606 };
4607
4608 ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description)
4609         : TestCase              (context, tcu::NODETYPE_PERFORMANCE, name, description)
4610         , m_numSamples  (20)
4611         , m_sampleNdx   (0)
4612         , m_samples             (m_numSamples)
4613 {
4614 }
4615
4616 void ReferenceReadPixelsTimeCase::init (void)
4617 {
4618         m_testCtx.getLog()
4619                 << tcu::TestLog::Message
4620                 << "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n"
4621                 << "Test result is the median of the samples in microseconds.\n"
4622                 << "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4623                 << tcu::TestLog::EndMessage;
4624 }
4625
4626 ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate (void)
4627 {
4628         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
4629         tcu::Surface                    resultSurface   (RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4630         deUint64                                startTime;
4631         deUint64                                endTime;
4632
4633         deYield();
4634         tcu::warmupCPU();
4635         deYield();
4636
4637         // "Render" something and wait for it
4638         gl.clearColor(0.0f, 1.0f, float(m_sampleNdx) / float(m_numSamples), 1.0f);
4639         gl.clear(GL_COLOR_BUFFER_BIT);
4640
4641         // wait for results
4642         glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4643
4644         // measure time used in readPixels
4645         startTime = deGetMicroseconds();
4646         glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4647         endTime = deGetMicroseconds();
4648
4649         m_samples[m_sampleNdx] = (int)(endTime - startTime);
4650
4651         if (++m_sampleNdx < m_numSamples)
4652                 return CONTINUE;
4653
4654         logAndSetTestResult();
4655         return STOP;
4656 }
4657
4658 void ReferenceReadPixelsTimeCase::logAndSetTestResult (void)
4659 {
4660         // Log sample list
4661         {
4662                 m_testCtx.getLog()
4663                         << tcu::TestLog::SampleList("Samples", "Samples")
4664                         << tcu::TestLog::SampleInfo
4665                         << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
4666                         << tcu::TestLog::EndSampleInfo;
4667
4668                 for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
4669                         m_testCtx.getLog()
4670                                 << tcu::TestLog::Sample
4671                                 << m_samples[sampleNdx]
4672                                 << tcu::TestLog::EndSample;
4673
4674                 m_testCtx.getLog() << tcu::TestLog::EndSampleList;
4675         }
4676
4677         // Log median
4678         {
4679                 float median;
4680                 float limit60Low;
4681                 float limit60Up;
4682
4683                 std::sort(m_samples.begin(), m_samples.end());
4684                 median          = linearSample(m_samples, 0.5f);
4685                 limit60Low      = linearSample(m_samples, 0.2f);
4686                 limit60Up       = linearSample(m_samples, 0.8f);
4687
4688                 m_testCtx.getLog()
4689                         << tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
4690                         << tcu::TestLog::Message
4691                         << "60 % of samples within range:\n"
4692                         << tcu::TestLog::EndMessage
4693                         << tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
4694                         << tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);
4695
4696                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
4697         }
4698 }
4699
4700 template <typename SampleType>
4701 class GenericUploadRenderTimeCase : public RenderCase<SampleType>
4702 {
4703 public:
4704         typedef typename RenderCase<SampleType>::SampleResult SampleResult;
4705
4706                                                         GenericUploadRenderTimeCase     (Context&                               context,
4707                                                                                                                  const char*                    name,
4708                                                                                                                  const char*                    description,
4709                                                                                                                  DrawMethod                             method,
4710                                                                                                                  TargetBuffer                   targetBuffer,
4711                                                                                                                  UploadMethod                   uploadMethod,
4712                                                                                                                  BufferState                    bufferState,
4713                                                                                                                  UploadRange                    uploadRange,
4714                                                                                                                  UnrelatedBufferType    unrelatedBufferType);
4715
4716 private:
4717         void                                            init                                    (void);
4718         void                                            runSample                               (SampleResult& sample);
4719
4720         using RenderCase<SampleType>::RENDER_AREA_SIZE;
4721
4722         const TargetBuffer                      m_targetBuffer;
4723         const BufferState                       m_bufferState;
4724         const UploadMethod                      m_uploadMethod;
4725         const UnrelatedBufferType       m_unrelatedBufferType;
4726         const UploadRange                       m_uploadRange;
4727
4728         using RenderCase<SampleType>::m_context;
4729         using RenderCase<SampleType>::m_testCtx;
4730         using RenderCase<SampleType>::m_drawMethod;
4731 };
4732
4733 template <typename SampleType>
4734 GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase (Context&                          context,
4735                                                                                                                                           const char*                   name,
4736                                                                                                                                           const char*                   description,
4737                                                                                                                                           DrawMethod                    method,
4738                                                                                                                                           TargetBuffer                  targetBuffer,
4739                                                                                                                                           UploadMethod                  uploadMethod,
4740                                                                                                                                           BufferState                   bufferState,
4741                                                                                                                                           UploadRange                   uploadRange,
4742                                                                                                                                           UnrelatedBufferType   unrelatedBufferType)
4743         : RenderCase<SampleType>        (context, name, description, method)
4744         , m_targetBuffer                        (targetBuffer)
4745         , m_bufferState                         (bufferState)
4746         , m_uploadMethod                        (uploadMethod)
4747         , m_unrelatedBufferType         (unrelatedBufferType)
4748         , m_uploadRange                         (uploadRange)
4749 {
4750         DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
4751         DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
4752         DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
4753         DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
4754         DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
4755 }
4756
4757 template <typename SampleType>
4758 void GenericUploadRenderTimeCase<SampleType>::init (void)
4759 {
4760         // init parent
4761         RenderCase<SampleType>::init();
4762
4763         // log
4764         {
4765                 const char* const       targetFunctionName              = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4766                 const int                       perVertexSize                   = (m_targetBuffer == TARGETBUFFER_INDEX) ? ((int)sizeof(deUint32)) : ((int)sizeof(tcu::Vec4[2]));
4767                 const int                       fullMinUploadSize               = RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
4768                 const int                       fullMaxUploadSize               = RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
4769                 const int                       minUploadSize                   = (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize/2, 4));
4770                 const int                       maxUploadSize                   = (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize/2, 4));
4771                 const int                       minUnrelatedUploadSize  = RenderCase<SampleType>::getMinWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
4772                 const int                       maxUnrelatedUploadSize  = RenderCase<SampleType>::getMaxWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
4773
4774                 m_testCtx.getLog()
4775                         << tcu::TestLog::Message
4776                         << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4777                         << "The "
4778                                 << ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib"))
4779                                 << " buffer "
4780                                 << ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents "))
4781                                 << "sourced by the rendering command "
4782                                 << ((m_bufferState == BUFFERSTATE_NEW)          ? ("is uploaded ") :
4783                                         (m_uploadRange == UPLOADRANGE_FULL)             ? ("are specified ") :
4784                                         (m_uploadRange == UPLOADRANGE_PARTIAL)  ? ("are updated (partial upload) ") :
4785                                         ((const char*)DE_NULL))
4786                                 << "just before issuing the rendering command.\n"
4787                         << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n"))
4788                         << "Buffer "
4789                                 << ((m_bufferState == BUFFERSTATE_NEW)          ? ("is uploaded") :
4790                                         (m_uploadRange == UPLOADRANGE_FULL)             ? ("contents are specified") :
4791                                         (m_uploadRange == UPLOADRANGE_PARTIAL)  ? ("contents are partially updated") :
4792                                         ((const char*)DE_NULL))
4793                                 << " with "
4794                                 << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"))
4795                                 << " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
4796                         << ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | MAP_UNSYNCHRONIZED_BIT\n") : (""))
4797                         << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : (""))
4798                         << RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
4799                         << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4800                         << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4801                         << "Workload sizes are in the range ["
4802                                 << RenderCase<SampleType>::getMinWorkloadSize() << ",  "
4803                                 << RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
4804                                 << "(["
4805                                 << getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
4806                                 << getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
4807                         << "Upload sizes are in the range ["
4808                                 << getHumanReadableByteSize(minUploadSize) << ","
4809                                 << getHumanReadableByteSize(maxUploadSize) << "].\n"
4810                         << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4811                                 ("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
4812                                 (""))
4813                         << "Test result is the approximated processing rate in MiB / s.\n"
4814                         << "Note that while upload time is measured, the time used is not included in the results.\n"
4815                         << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the results.\n") : (""))
4816                         << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4817                         << "Note! Test result may not be useful as is but instead should be compared against the reference.* group and other upload_and_draw.* group results.\n"
4818                         << tcu::TestLog::EndMessage;
4819         }
4820 }
4821
4822 template <typename SampleType>
4823 void GenericUploadRenderTimeCase<SampleType>::runSample (SampleResult& sample)
4824 {
4825         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
4826         const glu::Buffer               arrayBuffer                     (m_context.getRenderContext());
4827         const glu::Buffer               indexBuffer                     (m_context.getRenderContext());
4828         const glu::Buffer               unrelatedBuffer         (m_context.getRenderContext());
4829         const int                               numVertices                     = getLayeredGridNumVertices(sample.scene);
4830         tcu::Surface                    resultSurface           (RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4831         deUint64                                startTime;
4832         deUint64                                endTime;
4833         std::vector<tcu::Vec4>  vertexData;
4834         std::vector<deUint32>   indexData;
4835
4836         // create data
4837
4838         generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4839         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4840                 generateLayeredGridIndexData(indexData, sample.scene);
4841
4842         gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4843         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4844         RenderCase<SampleType>::setupVertexAttribs();
4845
4846         // target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu
4847
4848         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
4849         {
4850                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW);
4851                 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4852         }
4853         else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
4854         {
4855                 // do not touch the vertex buffer
4856         }
4857         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
4858         {
4859                 // hint that the target buffer will be modified soon
4860                 const glw::GLenum vertexDataUsage       = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4861                 const glw::GLenum indexDataUsage        = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4862
4863                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage);
4864                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], indexDataUsage);
4865                 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4866         }
4867         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
4868         {
4869                 if (m_targetBuffer == TARGETBUFFER_VERTEX)
4870                 {
4871                         // make the index buffer present on the gpu
4872                         // use another vertex buffer to keep original buffer in unused state
4873                         const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
4874
4875                         gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
4876                         RenderCase<SampleType>::setupVertexAttribs();
4877
4878                         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4879                         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4880                         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4881
4882                         // restore original state
4883                         gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4884                         RenderCase<SampleType>::setupVertexAttribs();
4885                 }
4886                 else if (m_targetBuffer == TARGETBUFFER_INDEX)
4887                 {
4888                         // make the vertex buffer present on the gpu
4889                         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4890                         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4891                 }
4892                 else
4893                         DE_ASSERT(false);
4894         }
4895         else
4896                 DE_ASSERT(false);
4897
4898         RenderCase<SampleType>::waitGLResults();
4899         GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
4900
4901         gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4902         gl.clear(GL_COLOR_BUFFER_BIT);
4903         RenderCase<SampleType>::waitGLResults();
4904
4905         tcu::warmupCPU();
4906
4907         // upload
4908
4909         {
4910                 glw::GLenum             target;
4911                 glw::GLsizeiptr size;
4912                 glw::GLintptr   offset = 0;
4913                 const void*             source;
4914
4915                 if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
4916                 {
4917                         target  = GL_ARRAY_BUFFER;
4918                         size    = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
4919                         source  = &vertexData[0];
4920                 }
4921                 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
4922                 {
4923                         target  = GL_ELEMENT_ARRAY_BUFFER;
4924                         size    = (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
4925                         source  = &indexData[0];
4926                 }
4927                 else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4928                 {
4929                         DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4930
4931                         target  = GL_ARRAY_BUFFER;
4932                         size    = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
4933                         offset  = (glw::GLintptr)deAlign32((int)size / 2, 4);
4934                         source  = (const deUint8*)&vertexData[0] + offset;
4935                 }
4936                 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4937                 {
4938                         DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4939
4940                         // upload to 25% - 75% range
4941                         target  = GL_ELEMENT_ARRAY_BUFFER;
4942                         size    = (glw::GLsizeiptr)deAlign32((deInt32)(indexData.size() * sizeof(deUint32)) / 2, 4);
4943                         offset  = (glw::GLintptr)deAlign32((int)size / 2, 4);
4944                         source  = (const deUint8*)&indexData[0] + offset;
4945                 }
4946                 else
4947                 {
4948                         DE_ASSERT(false);
4949                         return;
4950                 }
4951
4952                 startTime = deGetMicroseconds();
4953
4954                 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
4955                         gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
4956                 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4957                 {
4958                         // create buffer storage
4959                         if (m_bufferState == BUFFERSTATE_NEW)
4960                                 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4961                         gl.bufferSubData(target, offset, size, source);
4962                 }
4963                 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4964                 {
4965                         void*                   mapPtr;
4966                         glw::GLboolean  unmapSuccessful;
4967
4968                         // create buffer storage
4969                         if (m_bufferState == BUFFERSTATE_NEW)
4970                                 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4971
4972                         mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4973                         if (!mapPtr)
4974                                 throw tcu::Exception("MapBufferRange returned NULL");
4975
4976                         deMemcpy(mapPtr, source, (int)size);
4977
4978                         // if unmapping fails, just try again later
4979                         unmapSuccessful = gl.unmapBuffer(target);
4980                         if (!unmapSuccessful)
4981                                 throw UnmapFailureError();
4982                 }
4983                 else
4984                         DE_ASSERT(false);
4985
4986                 endTime = deGetMicroseconds();
4987
4988                 sample.result.uploadedDataSize = (int)size;
4989                 sample.result.duration.uploadDuration = endTime - startTime;
4990         }
4991
4992         // unrelated
4993         if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
4994         {
4995                 const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4996
4997                 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4998                 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4999                 // Attibute pointers are not modified, no need restore state
5000
5001                 sample.result.unrelatedDataSize = unrelatedUploadSize;
5002         }
5003
5004         // draw
5005         {
5006                 startTime = deGetMicroseconds();
5007
5008                 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5009                         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5010                 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5011                         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5012                 else
5013                         DE_ASSERT(false);
5014
5015                 endTime = deGetMicroseconds();
5016
5017                 sample.result.duration.renderDuration = endTime - startTime;
5018         }
5019
5020         // read
5021         {
5022                 startTime = deGetMicroseconds();
5023                 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5024                 endTime = deGetMicroseconds();
5025
5026                 sample.result.duration.readDuration = endTime - startTime;
5027         }
5028
5029         // set results
5030
5031         sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;
5032
5033         sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
5034         sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration;
5035         sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5036 }
5037
5038 class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
5039 {
5040 public:
5041         enum MapFlags
5042         {
5043                 MAPFLAG_NONE = 0,
5044                 MAPFLAG_INVALIDATE_BUFFER,
5045                 MAPFLAG_INVALIDATE_RANGE,
5046
5047                 MAPFLAG_LAST
5048         };
5049         enum UploadBufferTarget
5050         {
5051                 UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
5052                 UPLOADBUFFERTARGET_SAME_BUFFER,
5053
5054                 UPLOADBUFFERTARGET_LAST
5055         };
5056                                                                 BufferInUseRenderTimeCase       (Context&                       context,
5057                                                                                                                          const char*            name,
5058                                                                                                                          const char*            description,
5059                                                                                                                          DrawMethod                     method,
5060                                                                                                                          MapFlags                       mapFlags,
5061                                                                                                                          TargetBuffer           targetBuffer,
5062                                                                                                                          UploadMethod           uploadMethod,
5063                                                                                                                          UploadRange            uploadRange,
5064                                                                                                                          UploadBufferTarget     uploadTarget);
5065
5066 private:
5067         void                                            init                                            (void);
5068         void                                            runSample                                       (SampleResult& sample);
5069
5070         const TargetBuffer                      m_targetBuffer;
5071         const UploadMethod                      m_uploadMethod;
5072         const UploadRange                       m_uploadRange;
5073         const MapFlags                          m_mapFlags;
5074         const UploadBufferTarget        m_uploadBufferTarget;
5075 };
5076
5077 BufferInUseRenderTimeCase::BufferInUseRenderTimeCase (Context&                          context,
5078                                                                                                           const char*                   name,
5079                                                                                                           const char*                   description,
5080                                                                                                           DrawMethod                    method,
5081                                                                                                           MapFlags                              mapFlags,
5082                                                                                                           TargetBuffer                  targetBuffer,
5083                                                                                                           UploadMethod                  uploadMethod,
5084                                                                                                           UploadRange                   uploadRange,
5085                                                                                                           UploadBufferTarget    uploadTarget)
5086         : RenderCase<RenderUploadRenderReadDuration>    (context, name, description, method)
5087         , m_targetBuffer                                                                (targetBuffer)
5088         , m_uploadMethod                                                                (uploadMethod)
5089         , m_uploadRange                                                                 (uploadRange)
5090         , m_mapFlags                                                                    (mapFlags)
5091         , m_uploadBufferTarget                                                  (uploadTarget)
5092 {
5093         DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5094         DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5095         DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5096         DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
5097         DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
5098 }
5099
5100 void BufferInUseRenderTimeCase::init (void)
5101 {
5102         RenderCase<RenderUploadRenderReadDuration>::init();
5103
5104         // log
5105         {
5106                 const char* const       targetFunctionName              = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5107                 const char* const       uploadFunctionName              = (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange");
5108                 const bool                      isReferenceCase                 = (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
5109                 tcu::MessageBuilder     message                                 (&m_testCtx.getLog());
5110
5111                 message << "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
5112                                 << targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n";
5113
5114                 if (isReferenceCase)
5115                         message << "Rendering:\n"
5116                                         << "    before test: create and use buffers B and C\n"
5117                                         << "    first draw: render using buffer B\n"
5118                                         << ((m_uploadRange == UPLOADRANGE_FULL)         ? ("    upload: respecify buffer C contents\n") :
5119                                                 (m_uploadRange == UPLOADRANGE_PARTIAL)  ? ("    upload: modify buffer C contents\n")    :
5120                                                 ((const char*)DE_NULL))
5121                                         << "    second draw: render using buffer C\n"
5122                                         << "    read: readPixels\n";
5123                 else
5124                         message << "Rendering:\n"
5125                                         << "    before test: create and use buffer B\n"
5126                                         << "    first draw: render using buffer B\n"
5127                                         << ((m_uploadRange == UPLOADRANGE_FULL)         ? ("    upload: respecify buffer B contents\n") :
5128                                                 (m_uploadRange == UPLOADRANGE_PARTIAL)  ? ("    upload: modify buffer B contents\n")    :
5129                                                 ((const char*)DE_NULL))
5130                                         << "    second draw: render using buffer B\n"
5131                                         << "    read: readPixels\n";
5132
5133                 message << "Uploading using " << uploadFunctionName
5134                                         << ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE)    ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT")        :
5135                                                 (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)       ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT")       :
5136                                                 (m_mapFlags == MAPFLAG_NONE)                            ? ("")                                                                                                          :
5137                                                 ((const char*)DE_NULL))
5138                                         << "\n"
5139                                 << getNumSamples() << " test samples. Sample order is randomized.\n"
5140                                 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5141                                 << "Workload sizes are in the range ["
5142                                         << getMinWorkloadSize() << ",  "
5143                                         << getMaxWorkloadSize() << "] vertices "
5144                                         << "(["
5145                                         << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
5146                                         << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
5147                                 << "Test result is the approximated processing rate in MiB / s of the second draw call and the readPixels call.\n";
5148
5149                 if (isReferenceCase)
5150                         message << "Note! Test result should only be used as a baseline reference result for buffer.render_after_upload.draw_modify_draw test group results.";
5151                 else
5152                         message << "Note! Test result may not be useful as is but instead should be compared against the buffer.render_after_upload.reference.draw_upload_draw group results.\n";
5153
5154                 message << tcu::TestLog::EndMessage;
5155         }
5156 }
5157
5158 void BufferInUseRenderTimeCase::runSample (SampleResult& sample)
5159 {
5160         const glw::Functions&   gl                                              = m_context.getRenderContext().getFunctions();
5161         const glu::Buffer               arrayBuffer                             (m_context.getRenderContext());
5162         const glu::Buffer               indexBuffer                             (m_context.getRenderContext());
5163         const glu::Buffer               alternativeUploadBuffer (m_context.getRenderContext());
5164         const int                               numVertices                             = getLayeredGridNumVertices(sample.scene);
5165         tcu::Surface                    resultSurface                   (RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5166         deUint64                                startTime;
5167         deUint64                                endTime;
5168         std::vector<tcu::Vec4>  vertexData;
5169         std::vector<deUint32>   indexData;
5170
5171         // create data
5172
5173         generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5174         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5175                 generateLayeredGridIndexData(indexData, sample.scene);
5176
5177         // make buffers used
5178
5179         gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5180         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5181         setupVertexAttribs();
5182
5183         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5184         {
5185                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5186                 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5187         }
5188         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5189         {
5190                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5191                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5192                 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5193         }
5194         else
5195                 DE_ASSERT(false);
5196
5197         // another pair of buffers for reference case
5198         if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5199         {
5200                 if (m_targetBuffer == TARGETBUFFER_VERTEX)
5201                 {
5202                         gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
5203                         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5204
5205                         setupVertexAttribs();
5206                         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5207                 }
5208                 else if (m_targetBuffer == TARGETBUFFER_INDEX)
5209                 {
5210                         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
5211                         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5212                         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5213                 }
5214                 else
5215                         DE_ASSERT(false);
5216
5217                 // restore state
5218                 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5219                 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5220                 setupVertexAttribs();
5221         }
5222
5223         waitGLResults();
5224         GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5225
5226         gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5227         gl.clear(GL_COLOR_BUFFER_BIT);
5228         waitGLResults();
5229
5230         tcu::warmupCPU();
5231
5232         // first draw
5233         {
5234                 startTime = deGetMicroseconds();
5235
5236                 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5237                         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5238                 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5239                         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5240                 else
5241                         DE_ASSERT(false);
5242
5243                 endTime = deGetMicroseconds();
5244
5245                 sample.result.duration.firstRenderDuration = endTime - startTime;
5246         }
5247
5248         // upload
5249         {
5250                 glw::GLenum             target;
5251                 glw::GLsizeiptr size;
5252                 glw::GLintptr   offset = 0;
5253                 const void*             source;
5254
5255                 if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5256                 {
5257                         target  = GL_ARRAY_BUFFER;
5258                         size    = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5259                         source  = &vertexData[0];
5260                 }
5261                 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5262                 {
5263                         target  = GL_ELEMENT_ARRAY_BUFFER;
5264                         size    = (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
5265                         source  = &indexData[0];
5266                 }
5267                 else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5268                 {
5269                         target  = GL_ARRAY_BUFFER;
5270                         size    = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5271                         offset  = (glw::GLintptr)deAlign32((int)size / 2, 4);
5272                         source  = (const deUint8*)&vertexData[0] + offset;
5273                 }
5274                 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5275                 {
5276                         // upload to 25% - 75% range
5277                         target  = GL_ELEMENT_ARRAY_BUFFER;
5278                         size    = (glw::GLsizeiptr)deAlign32((deInt32)(indexData.size() * sizeof(deUint32)) / 2, 4);
5279                         offset  = (glw::GLintptr)deAlign32((int)size / 2, 4);
5280                         source  = (const deUint8*)&indexData[0] + offset;
5281                 }
5282                 else
5283                 {
5284                         DE_ASSERT(false);
5285                         return;
5286                 }
5287
5288                 // reference case? don't modify the buffer in use
5289                 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5290                         gl.bindBuffer(target, *alternativeUploadBuffer);
5291
5292                 startTime = deGetMicroseconds();
5293
5294                 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5295                         gl.bufferData(target, size, source, GL_STREAM_DRAW);
5296                 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5297                         gl.bufferSubData(target, offset, size, source);
5298                 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5299                 {
5300                         const int               mapFlags        = (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)     ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)     :
5301                                                                                   (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)      ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)      :
5302                                                                                   (-1);
5303                         void*                   mapPtr;
5304                         glw::GLboolean  unmapSuccessful;
5305
5306                         mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
5307                         if (!mapPtr)
5308                                 throw tcu::Exception("MapBufferRange returned NULL");
5309
5310                         deMemcpy(mapPtr, source, (int)size);
5311
5312                         // if unmapping fails, just try again later
5313                         unmapSuccessful = gl.unmapBuffer(target);
5314                         if (!unmapSuccessful)
5315                                 throw UnmapFailureError();
5316                 }
5317                 else
5318                         DE_ASSERT(false);
5319
5320                 endTime = deGetMicroseconds();
5321
5322                 sample.result.uploadedDataSize = (int)size;
5323                 sample.result.duration.uploadDuration = endTime - startTime;
5324         }
5325
5326         // second draw
5327         {
5328                 // Source vertex data from alternative buffer in refernce case
5329                 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
5330                         setupVertexAttribs();
5331
5332                 startTime = deGetMicroseconds();
5333
5334                 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5335                         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5336                 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5337                         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5338                 else
5339                         DE_ASSERT(false);
5340
5341                 endTime = deGetMicroseconds();
5342
5343                 sample.result.duration.secondRenderDuration = endTime - startTime;
5344         }
5345
5346         // read
5347         {
5348                 startTime = deGetMicroseconds();
5349                 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5350                 endTime = deGetMicroseconds();
5351
5352                 sample.result.duration.readDuration = endTime - startTime;
5353         }
5354
5355         // set results
5356
5357         sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5358
5359         sample.result.duration.renderReadDuration       = sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5360         sample.result.duration.totalDuration            = sample.result.duration.firstRenderDuration +
5361                                                                                                   sample.result.duration.uploadDuration +
5362                                                                                                   sample.result.duration.secondRenderDuration +
5363                                                                                                   sample.result.duration.readDuration;
5364         sample.result.duration.fitResponseDuration      = sample.result.duration.renderReadDuration;
5365 }
5366
5367 class UploadWaitDrawCase : public RenderPerformanceTestBase
5368 {
5369 public:
5370         struct Sample
5371         {
5372                 int                     numFrames;
5373                 deUint64        uploadCallEndTime;
5374         };
5375         struct Result
5376         {
5377                 deUint64        uploadDuration;
5378                 deUint64        renderDuration;
5379                 deUint64        readDuration;
5380                 deUint64        renderReadDuration;
5381
5382                 deUint64        timeBeforeUse;
5383         };
5384
5385                                                         UploadWaitDrawCase                              (Context&               context,
5386                                                                                                                          const char*    name,
5387                                                                                                                          const char*    description,
5388                                                                                                                          DrawMethod             drawMethod,
5389                                                                                                                          TargetBuffer   targetBuffer,
5390                                                                                                                          UploadMethod   uploadMethod,
5391                                                                                                                          BufferState    bufferState);
5392                                                         ~UploadWaitDrawCase                             (void);
5393
5394 private:
5395         void                                    init                                                    (void);
5396         void                                    deinit                                                  (void);
5397         IterateResult                   iterate                                                 (void);
5398
5399         void                                    uploadBuffer                                    (Sample& sample, Result& result);
5400         void                                    drawFromBuffer                                  (Sample& sample, Result& result);
5401         void                                    reuseAndDeleteBuffer                    (void);
5402         void                                    logAndSetTestResult                             (void);
5403         void                                    logSamples                                              (void);
5404         void                                    drawMisc                                                (void);
5405         int                                             findStabilizationSample                 (deUint64 (Result::*target), const char* description);
5406         bool                                    checkSampleTemporalStability    (deUint64 (Result::*target), const char* description);
5407
5408         const DrawMethod                m_drawMethod;
5409         const TargetBuffer              m_targetBuffer;
5410         const UploadMethod              m_uploadMethod;
5411         const BufferState               m_bufferState;
5412
5413         const int                               m_numSamplesPerSwap;
5414         const int                               m_numMaxSwaps;
5415
5416         int                                             m_frameNdx;
5417         int                                             m_sampleNdx;
5418         int                                             m_numVertices;
5419
5420         std::vector<tcu::Vec4>  m_vertexData;
5421         std::vector<deUint32>   m_indexData;
5422         std::vector<Sample>             m_samples;
5423         std::vector<Result>             m_results;
5424         std::vector<int>                m_iterationOrder;
5425
5426         deUint32                                m_vertexBuffer;
5427         deUint32                                m_indexBuffer;
5428         deUint32                                m_miscBuffer;
5429         int                                             m_numMiscVertices;
5430 };
5431
5432 UploadWaitDrawCase::UploadWaitDrawCase (Context&                context,
5433                                                                                 const char*             name,
5434                                                                                 const char*             description,
5435                                                                                 DrawMethod              drawMethod,
5436                                                                                 TargetBuffer    targetBuffer,
5437                                                                                 UploadMethod    uploadMethod,
5438                                                                                 BufferState             bufferState)
5439         : RenderPerformanceTestBase     (context, name, description)
5440         , m_drawMethod                          (drawMethod)
5441         , m_targetBuffer                        (targetBuffer)
5442         , m_uploadMethod                        (uploadMethod)
5443         , m_bufferState                         (bufferState)
5444         , m_numSamplesPerSwap           (10)
5445         , m_numMaxSwaps                         (4)
5446         , m_frameNdx                            (0)
5447         , m_sampleNdx                           (0)
5448         , m_numVertices                         (-1)
5449         , m_vertexBuffer                        (0)
5450         , m_indexBuffer                         (0)
5451         , m_miscBuffer                          (0)
5452         , m_numMiscVertices                     (-1)
5453 {
5454 }
5455
5456 UploadWaitDrawCase::~UploadWaitDrawCase (void)
5457 {
5458         deinit();
5459 }
5460
5461 void UploadWaitDrawCase::init (void)
5462 {
5463         const glw::Functions&   gl                                              = m_context.getRenderContext().getFunctions();
5464         const int                               vertexAttribSize                = (int)sizeof(tcu::Vec4) * 2; // color4, position4
5465         const int                               vertexIndexSize                 = (int)sizeof(deUint32);
5466         const int                               vertexUploadDataSize    = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);
5467
5468         RenderPerformanceTestBase::init();
5469
5470         // requirements
5471
5472         if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
5473                 m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
5474                 throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
5475
5476         // gl state
5477
5478         gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5479
5480         // enable bleding to prevent grid layers from being discarded
5481
5482         gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
5483         gl.blendEquation(GL_FUNC_ADD);
5484         gl.enable(GL_BLEND);
5485
5486         // scene
5487
5488         {
5489                 LayeredGridSpec scene;
5490
5491                 // create ~8MB workload with similar characteristics as in the other test
5492                 // => makes comparison to other results more straightforward
5493                 scene.gridWidth = 93;
5494                 scene.gridHeight = 93;
5495                 scene.gridLayers = 5;
5496
5497                 generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
5498                 generateLayeredGridIndexData(m_indexData, scene);
5499                 m_numVertices = getLayeredGridNumVertices(scene);
5500         }
5501
5502         // buffers
5503
5504         if (m_bufferState == BUFFERSTATE_NEW)
5505         {
5506                 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5507                 {
5508                         // reads from two buffers, prepare the static buffer
5509
5510                         if (m_targetBuffer == TARGETBUFFER_VERTEX)
5511                         {
5512                                 // index buffer is static, use another vertex buffer to keep original buffer in unused state
5513                                 const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5514
5515                                 gl.genBuffers(1, &m_indexBuffer);
5516                                 gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
5517                                 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5518                                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5519                                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], GL_STATIC_DRAW);
5520
5521                                 setupVertexAttribs();
5522                                 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5523                         }
5524                         else if (m_targetBuffer == TARGETBUFFER_INDEX)
5525                         {
5526                                 // vertex buffer is static
5527                                 gl.genBuffers(1, &m_vertexBuffer);
5528                                 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5529                                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5530
5531                                 setupVertexAttribs();
5532                                 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5533                         }
5534                         else
5535                                 DE_ASSERT(false);
5536                 }
5537         }
5538         else if (m_bufferState == BUFFERSTATE_EXISTING)
5539         {
5540                 const glw::GLenum vertexUsage   = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5541                 const glw::GLenum indexUsage    = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5542
5543                 gl.genBuffers(1, &m_vertexBuffer);
5544                 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5545                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage);
5546
5547                 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5548                 {
5549                         gl.genBuffers(1, &m_indexBuffer);
5550                         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5551                         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], indexUsage);
5552                 }
5553
5554                 setupVertexAttribs();
5555
5556                 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5557                         gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5558                 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5559                         gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5560                 else
5561                         DE_ASSERT(false);
5562         }
5563         else
5564                 DE_ASSERT(false);
5565
5566         // misc draw buffer
5567         {
5568                 std::vector<tcu::Vec4>  vertexData;
5569                 LayeredGridSpec                 scene;
5570
5571                 // create ~1.5MB workload with similar characteristics
5572                 scene.gridWidth = 40;
5573                 scene.gridHeight = 40;
5574                 scene.gridLayers = 5;
5575
5576                 generateLayeredGridVertexAttribData4C4V(vertexData, scene);
5577
5578                 gl.genBuffers(1, &m_miscBuffer);
5579                 gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5580                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW);
5581
5582                 m_numMiscVertices = getLayeredGridNumVertices(scene);
5583         }
5584
5585         // iterations
5586         {
5587                 m_samples.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5588                 m_results.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5589
5590                 for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
5591                 for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
5592                 {
5593                         const int index = numSwaps*m_numSamplesPerSwap + sampleNdx;
5594
5595                         m_samples[index].numFrames = numSwaps;
5596                 }
5597
5598                 m_iterationOrder.resize(m_samples.size());
5599                 generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
5600         }
5601
5602         // log
5603         m_testCtx.getLog()
5604                 << tcu::TestLog::Message
5605                 << "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
5606                 << "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n"
5607                 << "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n"
5608                 << "Uploading using "
5609                         << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)                ? ("bufferData")                                                                                                                                                                                        :
5610                                 (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)        ? ("bufferSubData")                                                                                                                                                                                     :
5611                                 (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)       ? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT")       :
5612                                 ((const char*)DE_NULL))
5613                         << "\n"
5614                 << "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
5615                 << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
5616                 << "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
5617                 << "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
5618                 << tcu::TestLog::EndMessage;
5619 }
5620
5621 void UploadWaitDrawCase::deinit (void)
5622 {
5623         RenderPerformanceTestBase::deinit();
5624
5625         if (m_vertexBuffer)
5626         {
5627                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
5628                 m_vertexBuffer = 0;
5629         }
5630         if (m_indexBuffer)
5631         {
5632                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
5633                 m_indexBuffer = 0;
5634         }
5635         if (m_miscBuffer)
5636         {
5637                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
5638                 m_miscBuffer = 0;
5639         }
5640 }
5641
5642 UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate (void)
5643 {
5644         const glw::Functions&   gl                                                              = m_context.getRenderContext().getFunctions();
5645         const int                               betweenIterationDummyFrameCount = 5; // draw misc between test samples
5646         const int                               frameNdx                                                = m_frameNdx++;
5647         const int                               currentSampleNdx                                = m_iterationOrder[m_sampleNdx];
5648
5649         // Simulate work for about 8ms
5650         busyWait(8000);
5651
5652         // Dummy rendering during dummy frames
5653         if (frameNdx != m_samples[currentSampleNdx].numFrames)
5654         {
5655                 // draw similar from another buffer
5656                 drawMisc();
5657         }
5658
5659         if (frameNdx == 0)
5660         {
5661                 // upload and start the clock
5662                 uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5663         }
5664
5665         if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
5666         {
5667                 // draw using the uploaded buffer
5668                 drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5669
5670                 // re-use buffer for something else to make sure test iteration do not affect each other
5671                 if (m_bufferState == BUFFERSTATE_NEW)
5672                         reuseAndDeleteBuffer();
5673         }
5674         else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationDummyFrameCount)
5675         {
5676                 // next sample
5677                 ++m_sampleNdx;
5678                 m_frameNdx = 0;
5679         }
5680
5681         GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");
5682
5683         if (m_sampleNdx < (int)m_samples.size())
5684                 return CONTINUE;
5685
5686         logAndSetTestResult();
5687         return STOP;
5688 }
5689
5690 void UploadWaitDrawCase::uploadBuffer (Sample& sample, Result& result)
5691 {
5692         const glw::Functions&   gl                      = m_context.getRenderContext().getFunctions();
5693         deUint64                                startTime;
5694         deUint64                                endTime;
5695         glw::GLenum                             target;
5696         glw::GLsizeiptr                 size;
5697         const void*                             source;
5698
5699         // data source
5700
5701         if (m_targetBuffer == TARGETBUFFER_VERTEX)
5702         {
5703                 DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5704
5705                 target  = GL_ARRAY_BUFFER;
5706                 size    = (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
5707                 source  = &m_vertexData[0];
5708         }
5709         else if (m_targetBuffer == TARGETBUFFER_INDEX)
5710         {
5711                 DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5712
5713                 target  = GL_ELEMENT_ARRAY_BUFFER;
5714                 size    = (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32));
5715                 source  = &m_indexData[0];
5716         }
5717         else
5718         {
5719                 DE_ASSERT(false);
5720                 return;
5721         }
5722
5723         // gen buffer
5724
5725         if (m_bufferState == BUFFERSTATE_NEW)
5726         {
5727                 if (m_targetBuffer == TARGETBUFFER_VERTEX)
5728                 {
5729                         gl.genBuffers(1, &m_vertexBuffer);
5730                         gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5731                 }
5732                 else if (m_targetBuffer == TARGETBUFFER_INDEX)
5733                 {
5734                         gl.genBuffers(1, &m_indexBuffer);
5735                         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5736                 }
5737                 else
5738                         DE_ASSERT(false);
5739
5740                 if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA ||
5741                         m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5742                 {
5743                         gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
5744                 }
5745         }
5746         else if (m_bufferState == BUFFERSTATE_EXISTING)
5747         {
5748                 if (m_targetBuffer == TARGETBUFFER_VERTEX)
5749                         gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5750                 else if (m_targetBuffer == TARGETBUFFER_INDEX)
5751                         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5752                 else
5753                         DE_ASSERT(false);
5754         }
5755         else
5756                 DE_ASSERT(false);
5757
5758         // upload
5759
5760         startTime = deGetMicroseconds();
5761
5762         if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5763                 gl.bufferData(target, size, source, GL_STATIC_DRAW);
5764         else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5765                 gl.bufferSubData(target, 0, size, source);
5766         else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5767         {
5768                 void*                   mapPtr;
5769                 glw::GLboolean  unmapSuccessful;
5770
5771                 mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
5772                 if (!mapPtr)
5773                         throw tcu::Exception("MapBufferRange returned NULL");
5774
5775                 deMemcpy(mapPtr, source, (int)size);
5776
5777                 // if unmapping fails, just try again later
5778                 unmapSuccessful = gl.unmapBuffer(target);
5779                 if (!unmapSuccessful)
5780                         throw UnmapFailureError();
5781         }
5782         else
5783                 DE_ASSERT(false);
5784
5785         endTime = deGetMicroseconds();
5786
5787         sample.uploadCallEndTime = endTime;
5788         result.uploadDuration = endTime - startTime;
5789 }
5790
5791 void UploadWaitDrawCase::drawFromBuffer (Sample& sample, Result& result)
5792 {
5793         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
5794         tcu::Surface                    resultSurface   (RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5795         deUint64                                startTime;
5796         deUint64                                endTime;
5797
5798         DE_ASSERT(m_vertexBuffer != 0);
5799         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5800                 DE_ASSERT(m_indexBuffer == 0);
5801         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5802                 DE_ASSERT(m_indexBuffer != 0);
5803         else
5804                 DE_ASSERT(false);
5805
5806         // draw
5807         {
5808                 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5809                 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5810                         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5811
5812                 setupVertexAttribs();
5813
5814                 // microseconds passed since return from upload call
5815                 result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;
5816
5817                 startTime = deGetMicroseconds();
5818
5819                 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5820                         gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5821                 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5822                         gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5823                 else
5824                         DE_ASSERT(false);
5825
5826                 endTime = deGetMicroseconds();
5827
5828                 result.renderDuration = endTime - startTime;
5829         }
5830
5831         // read
5832         {
5833                 startTime = deGetMicroseconds();
5834                 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5835                 endTime = deGetMicroseconds();
5836
5837                 result.readDuration = endTime - startTime;
5838         }
5839
5840         result.renderReadDuration = result.renderDuration + result.readDuration;
5841 }
5842
5843 void UploadWaitDrawCase::reuseAndDeleteBuffer (void)
5844 {
5845         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5846
5847         if (m_targetBuffer == TARGETBUFFER_INDEX)
5848         {
5849                 // respecify and delete index buffer
5850                 static const deUint32 indices[3] = {1, 3, 8};
5851
5852                 DE_ASSERT(m_indexBuffer != 0);
5853
5854                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
5855                 gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
5856                 gl.deleteBuffers(1, &m_indexBuffer);
5857                 m_indexBuffer = 0;
5858         }
5859         else if (m_targetBuffer == TARGETBUFFER_VERTEX)
5860         {
5861                 // respecify and delete vertex buffer
5862                 static const tcu::Vec4 coloredTriangle[6] =
5863                 {
5864                         tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f),
5865                         tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f,  0.4f, 0.0f, 1.0f),
5866                         tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4( 0.8f, -0.1f, 0.0f, 1.0f),
5867                 };
5868
5869                 DE_ASSERT(m_vertexBuffer != 0);
5870
5871                 gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
5872                 gl.drawArrays(GL_TRIANGLES, 0, 3);
5873                 gl.deleteBuffers(1, &m_vertexBuffer);
5874                 m_vertexBuffer = 0;
5875         }
5876
5877         waitGLResults();
5878 }
5879
5880 void UploadWaitDrawCase::logAndSetTestResult (void)
5881 {
5882         int             uploadStabilization;
5883         int             renderReadStabilization;
5884         int             renderStabilization;
5885         int             readStabilization;
5886         bool    temporallyStable;
5887
5888         {
5889                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
5890                 logSamples();
5891         }
5892
5893         {
5894                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");
5895
5896                 // log stabilization points
5897                 renderReadStabilization = findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
5898                 uploadStabilization             = findStabilizationSample(&Result::uploadDuration, "Upload time");
5899                 renderStabilization             = findStabilizationSample(&Result::renderDuration, "Draw call time");
5900                 readStabilization               = findStabilizationSample(&Result::readDuration, "ReadPixels time");
5901
5902                 temporallyStable                = true;
5903                 temporallyStable                &= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
5904                 temporallyStable                &= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
5905                 temporallyStable                &= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
5906                 temporallyStable                &= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
5907         }
5908
5909         {
5910                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");
5911
5912                 // Check result sanily
5913                 if (uploadStabilization != 0)
5914                         m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage;
5915                 if (!temporallyStable)
5916                         m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to drift to one direction during test execution." << tcu::TestLog::EndMessage;
5917
5918                 // render & read
5919                 if (renderReadStabilization == -1)
5920                         m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5921                 else
5922                         m_testCtx.getLog() << tcu::TestLog::Integer("RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization);
5923
5924                 // draw call
5925                 if (renderStabilization == -1)
5926                         m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage;
5927                 else
5928                         m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization);
5929
5930                 // readpixels
5931                 if (readStabilization == -1)
5932                         m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5933                 else
5934                         m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization);
5935
5936                 // Report renderReadStabilization
5937                 if (renderReadStabilization != -1)
5938                         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
5939                 else
5940                         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
5941         }
5942 }
5943
5944 void UploadWaitDrawCase::logSamples (void)
5945 {
5946         // Inverse m_iterationOrder
5947
5948         std::vector<int> runOrder(m_iterationOrder.size());
5949         for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
5950                 runOrder[m_iterationOrder[ndx]] = ndx;
5951
5952         // Log samples
5953
5954         m_testCtx.getLog()
5955                 << tcu::TestLog::SampleList("Samples", "Samples")
5956                 << tcu::TestLog::SampleInfo
5957                 << tcu::TestLog::ValueInfo("NumSwaps",          "SwapBuffers before use",                       "",             QP_SAMPLE_VALUE_TAG_PREDICTOR)
5958                 << tcu::TestLog::ValueInfo("Delay",                     "Time before use",                                      "us",   QP_SAMPLE_VALUE_TAG_PREDICTOR)
5959                 << tcu::TestLog::ValueInfo("RunOrder",          "Sample run order",                                     "",             QP_SAMPLE_VALUE_TAG_PREDICTOR)
5960                 << tcu::TestLog::ValueInfo("DrawReadTime",      "Draw call and ReadPixels time",        "us",   QP_SAMPLE_VALUE_TAG_RESPONSE)
5961                 << tcu::TestLog::ValueInfo("TotalTime",         "Total time",                                           "us",   QP_SAMPLE_VALUE_TAG_RESPONSE)
5962                 << tcu::TestLog::ValueInfo("Upload time",       "Upload time",                                          "us",   QP_SAMPLE_VALUE_TAG_RESPONSE)
5963                 << tcu::TestLog::ValueInfo("DrawCallTime",      "Draw call time",                                       "us",   QP_SAMPLE_VALUE_TAG_RESPONSE)
5964                 << tcu::TestLog::ValueInfo("ReadTime",          "ReadPixels time",                                      "us",   QP_SAMPLE_VALUE_TAG_RESPONSE)
5965                 << tcu::TestLog::EndSampleInfo;
5966
5967         for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
5968                 m_testCtx.getLog()
5969                         << tcu::TestLog::Sample
5970                         << m_samples[sampleNdx].numFrames
5971                         << (int)m_results[sampleNdx].timeBeforeUse
5972                         << runOrder[sampleNdx]
5973                         << (int)m_results[sampleNdx].renderReadDuration
5974                         << (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
5975                         << (int)m_results[sampleNdx].uploadDuration
5976                         << (int)m_results[sampleNdx].renderDuration
5977                         << (int)m_results[sampleNdx].readDuration
5978                         << tcu::TestLog::EndSample;
5979
5980         m_testCtx.getLog() << tcu::TestLog::EndSampleList;
5981 }
5982
5983 void UploadWaitDrawCase::drawMisc (void)
5984 {
5985         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5986
5987         gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5988         setupVertexAttribs();
5989         gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
5990 }
5991
5992 struct DistributionCompareResult
5993 {
5994         bool    equal;
5995         float   standardDeviations;
5996 };
5997
5998 template <typename Comparer>
5999 static float sumOfRanks (const std::vector<deUint64>& testSamples, const std::vector<deUint64>& allSamples, const Comparer& comparer)
6000 {
6001         float sum = 0;
6002
6003         for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
6004         {
6005                 const deUint64  testSample              = testSamples[sampleNdx];
6006                 const int               lowerIndex              = (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6007                 const int               upperIndex              = (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6008                 const int               lowerRank               = lowerIndex + 1;       // convert zero-indexed to rank
6009                 const int               upperRank               = upperIndex;           // convert zero-indexed to rank, upperIndex is last equal + 1
6010                 const float             rankMidpoint    = (float)(lowerRank + upperRank) / 2.0f;
6011
6012                 sum += rankMidpoint;
6013         }
6014
6015         return sum;
6016 }
6017
6018 template <typename Comparer>
6019 static DistributionCompareResult distributionCompare (const std::vector<deUint64>& orderedObservationsA, const std::vector<deUint64>& orderedObservationsB, const Comparer& comparer)
6020 {
6021         // Mann-Whitney U test
6022
6023         const int                               n1                      = (int)orderedObservationsA.size();
6024         const int                               n2                      = (int)orderedObservationsB.size();
6025         std::vector<deUint64>   allSamples      (n1 + n2);
6026
6027         std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
6028         std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
6029         std::sort(allSamples.begin(), allSamples.end());
6030
6031         {
6032                 const float                                     R1              = sumOfRanks(orderedObservationsA, allSamples, comparer);
6033
6034                 const float                                     U1              = (float)(n1*n2 + n1*(n1 + 1)/2) - R1;
6035                 const float                                     U2              = (float)(n1 * n2) - U1;
6036                 const float                                     U               = de::min(U1, U2);
6037
6038                 // \note: sample sizes might not be large enough to expect normal distribution but we do it anyway
6039
6040                 const float                                     mU              = (float)(n1 * n2) / 2.0f;
6041                 const float                                     sigmaU  = deFloatSqrt((float)(n1*n2*(n1+n2+1)) / 12.0f);
6042                 const float                                     z               = (U - mU) / sigmaU;
6043
6044                 DistributionCompareResult       result;
6045
6046                 result.equal                            = (de::abs(z) <= 1.96f); // accept within 95% confidence interval
6047                 result.standardDeviations       = z;
6048
6049                 return result;
6050         }
6051 }
6052
6053 template <typename T>
6054 struct ThresholdComparer
6055 {
6056         float   relativeThreshold;
6057         T               absoluteThreshold;
6058
6059         bool operator() (const T& a, const T& b) const
6060         {
6061                 const float diff = de::abs((float)a - (float)b);
6062
6063                 // thresholds
6064                 if (diff <= (float)absoluteThreshold)
6065                         return false;
6066                 if (diff <= float(a)*relativeThreshold ||
6067                         diff <= float(b)*relativeThreshold)
6068                         return false;
6069
6070                 // cmp
6071                 return a < b;
6072         }
6073 };
6074
6075 int UploadWaitDrawCase::findStabilizationSample (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6076 {
6077         std::vector<std::vector<deUint64> >     sampleObservations(m_numMaxSwaps+1);
6078         ThresholdComparer<deUint64>                     comparer;
6079
6080         comparer.relativeThreshold = 0.15f;     // 15%
6081         comparer.absoluteThreshold = 100;       // (us), assumed sampling precision
6082
6083         // get observations and order them
6084
6085         for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
6086         {
6087                 int insertNdx = 0;
6088
6089                 sampleObservations[swapNdx].resize(m_numSamplesPerSwap);
6090
6091                 for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
6092                         if (m_samples[ndx].numFrames == swapNdx)
6093                                 sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;
6094
6095                 DE_ASSERT(insertNdx == m_numSamplesPerSwap);
6096
6097                 std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
6098         }
6099
6100         // find stabilization point
6101
6102         for (int sampleNdx = m_numMaxSwaps-1; sampleNdx != -1; --sampleNdx )
6103         {
6104                 // Distribution is equal to all following distributions
6105                 for (int cmpTargetDistribution = sampleNdx+1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
6106                 {
6107                         // Stable section ends here?
6108                         const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
6109                         if (!result.equal)
6110                         {
6111                                 // Last two samples are not equal? Samples never stabilized
6112                                 if (sampleNdx == m_numMaxSwaps-1)
6113                                 {
6114                                         m_testCtx.getLog()
6115                                                 << tcu::TestLog::Message
6116                                                 << description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6117                                                 << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6118                                                 << "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6119                                                 << "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6120                                                 << tcu::TestLog::EndMessage;
6121                                         return -1;
6122                                 }
6123                                 else
6124                                 {
6125                                         m_testCtx.getLog()
6126                                                 << tcu::TestLog::Message
6127                                                 << description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6128                                                 << "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n"
6129                                                 << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6130                                                 << "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6131                                                 << "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6132                                                 << tcu::TestLog::EndMessage;
6133
6134                                         return sampleNdx+1;
6135                                 }
6136                         }
6137                 }
6138         }
6139
6140         m_testCtx.getLog()
6141                 << tcu::TestLog::Message
6142                 << description << ": All samples seem to have the same distribution"
6143                 << tcu::TestLog::EndMessage;
6144
6145         // all distributions equal
6146         return 0;
6147 }
6148
6149 bool UploadWaitDrawCase::checkSampleTemporalStability (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6150 {
6151         // Try to find correlation with sample order and sample times
6152
6153         const int                                               numDataPoints   = (int)m_iterationOrder.size();
6154         std::vector<tcu::Vec2>                  dataPoints              (m_iterationOrder.size());
6155         LineParametersWithConfidence    lineFit;
6156
6157         for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6158         {
6159                 dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
6160                 dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
6161         }
6162
6163         lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);
6164
6165         // Difference of more than 25% of the offset along the whole sample range
6166         if (de::abs(lineFit.coefficient) * (float)numDataPoints > de::abs(lineFit.offset) * 0.25f)
6167         {
6168                 m_testCtx.getLog()
6169                         << tcu::TestLog::Message
6170                         << description << ": Correlation with data point observation order and result time. Results are not temporally stable, observations are not independent.\n"
6171                         << "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
6172                         << tcu::TestLog::EndMessage;
6173
6174                 return false;
6175         }
6176         else
6177                 return true;
6178 }
6179
6180 } // anonymous
6181
6182 BufferDataUploadTests::BufferDataUploadTests (Context& context)
6183         : TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
6184 {
6185 }
6186
6187 BufferDataUploadTests::~BufferDataUploadTests (void)
6188 {
6189 }
6190
6191 void BufferDataUploadTests::init (void)
6192 {
6193         static const struct BufferUsage
6194         {
6195                 const char* name;
6196                 deUint32        usage;
6197                 bool            primaryUsage;
6198         } bufferUsages[] =
6199         {
6200                 { "stream_draw",        GL_STREAM_DRAW,         true    },
6201                 { "stream_read",        GL_STREAM_READ,         false   },
6202                 { "stream_copy",        GL_STREAM_COPY,         false   },
6203                 { "static_draw",        GL_STATIC_DRAW,         true    },
6204                 { "static_read",        GL_STATIC_READ,         false   },
6205                 { "static_copy",        GL_STATIC_COPY,         false   },
6206                 { "dynamic_draw",       GL_DYNAMIC_DRAW,        true    },
6207                 { "dynamic_read",       GL_DYNAMIC_READ,        false   },
6208                 { "dynamic_copy",       GL_DYNAMIC_COPY,        false   },
6209         };
6210
6211         tcu::TestCaseGroup* const referenceGroup                        = new tcu::TestCaseGroup(m_testCtx, "reference",                        "Reference functions");
6212         tcu::TestCaseGroup* const functionCallGroup                     = new tcu::TestCaseGroup(m_testCtx, "function_call",            "Function call timing");
6213         tcu::TestCaseGroup* const modifyAfterUseGroup           = new tcu::TestCaseGroup(m_testCtx, "modify_after_use",         "Function call time after buffer has been used");
6214         tcu::TestCaseGroup* const renderAfterUploadGroup        = new tcu::TestCaseGroup(m_testCtx, "render_after_upload",      "Function call time of draw commands after buffer has been modified");
6215
6216         addChild(referenceGroup);
6217         addChild(functionCallGroup);
6218         addChild(modifyAfterUseGroup);
6219         addChild(renderAfterUploadGroup);
6220
6221         // .reference
6222         {
6223                 static const struct BufferSizeRange
6224                 {
6225                         const char* name;
6226                         int                     minBufferSize;
6227                         int                     maxBufferSize;
6228                         int                     numSamples;
6229                         bool            largeBuffersCase;
6230                 } sizeRanges[] =
6231                 {
6232                         { "small_buffers", 0,           1 << 18,        64,             false   }, // !< 0kB - 256kB
6233                         { "large_buffers", 1 << 18,     1 << 24,        32,             true    }, // !< 256kB - 16MB
6234                 };
6235
6236                 for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
6237                 {
6238                         referenceGroup->addChild(new ReferenceMemcpyCase(m_context,
6239                                                                                                                          std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
6240                                                                                                                          "Test memcpy performance",
6241                                                                                                                          sizeRanges[bufferSizeRangeNdx].minBufferSize,
6242                                                                                                                          sizeRanges[bufferSizeRangeNdx].maxBufferSize,
6243                                                                                                                          sizeRanges[bufferSizeRangeNdx].numSamples,
6244                                                                                                                          sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
6245                 }
6246         }
6247
6248         // .function_call
6249         {
6250                 const int minBufferSize         = 0;            // !< 0kiB
6251                 const int maxBufferSize         = 1 << 24;      // !< 16MiB
6252                 const int numDataSamples        = 25;
6253                 const int numMapSamples         = 25;
6254
6255                 tcu::TestCaseGroup* const bufferDataMethodGroup         = new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
6256                 tcu::TestCaseGroup* const bufferSubDataMethodGroup      = new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
6257                 tcu::TestCaseGroup* const mapBufferRangeMethodGroup     = new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");
6258
6259                 functionCallGroup->addChild(bufferDataMethodGroup);
6260                 functionCallGroup->addChild(bufferSubDataMethodGroup);
6261                 functionCallGroup->addChild(mapBufferRangeMethodGroup);
6262
6263                 // .buffer_data
6264                 {
6265                         static const struct TargetCase
6266                         {
6267                                 tcu::TestCaseGroup*                             group;
6268                                 BufferDataUploadCase::CaseType  caseType;
6269                                 bool                                                    allUsages;
6270                         } targetCases[] =
6271                         {
6272                                 { new tcu::TestCaseGroup(m_testCtx, "new_buffer",                               "Target new buffer"),                                                   BufferDataUploadCase::CASE_NEW_BUFFER,                  true    },
6273                                 { new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer",               "Target new unspecified buffer"),                               BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER,  true    },
6274                                 { new tcu::TestCaseGroup(m_testCtx, "specified_buffer",                 "Target new specified buffer"),                                 BufferDataUploadCase::CASE_SPECIFIED_BUFFER,    true    },
6275                                 { new tcu::TestCaseGroup(m_testCtx, "used_buffer",                              "Target buffer that was used in draw"),                 BufferDataUploadCase::CASE_USED_BUFFER,                 true    },
6276                                 { new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer",               "Target larger buffer that was used in draw"),  BufferDataUploadCase::CASE_USED_LARGER_BUFFER,  false   },
6277                         };
6278
6279                         for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
6280                         {
6281                                 bufferDataMethodGroup->addChild(targetCases[targetNdx].group);
6282
6283                                 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6284                                         if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
6285                                                 targetCases[targetNdx].group->addChild(new BufferDataUploadCase(m_context,
6286                                                                                                                                                                                 std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6287                                                                                                                                                                                 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6288                                                                                                                                                                                 minBufferSize,
6289                                                                                                                                                                                 maxBufferSize,
6290                                                                                                                                                                                 numDataSamples,
6291                                                                                                                                                                                 bufferUsages[usageNdx].usage,
6292                                                                                                                                                                                 targetCases[targetNdx].caseType));
6293                         }
6294                 }
6295
6296                 // .buffer_sub_data
6297                 {
6298                         static const struct FlagCase
6299                         {
6300                                 tcu::TestCaseGroup*                                     group;
6301                                 BufferSubDataUploadCase::CaseType       parentCase;
6302                                 bool                                                            allUsages;
6303                                 int                                                                     flags;
6304                         } flagCases[] =
6305                         {
6306                                 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload",                                      ""),                                                                                                                        BufferSubDataUploadCase::CASE_USED_BUFFER,      true,   BufferSubDataUploadCase::FLAG_FULL_UPLOAD                                                                                                                       },
6307                                 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",    "Clear buffer with bufferData(...,NULL) before sub data call"),     BufferSubDataUploadCase::CASE_USED_BUFFER,      false,  BufferSubDataUploadCase::FLAG_FULL_UPLOAD    | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE      },
6308                                 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload",                   ""),                                                                                                                        BufferSubDataUploadCase::CASE_USED_BUFFER,      true,   BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD                                                                                                            },
6309                                 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"),     BufferSubDataUploadCase::CASE_USED_BUFFER,      false,  BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE      },
6310                         };
6311
6312                         for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
6313                         {
6314                                 bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);
6315
6316                                 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6317                                         if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
6318                                                         flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(m_context,
6319                                                                                                                                                                                    std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6320                                                                                                                                                                                    std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6321                                                                                                                                                                                    minBufferSize,
6322                                                                                                                                                                                    maxBufferSize,
6323                                                                                                                                                                                    numDataSamples,
6324                                                                                                                                                                                    bufferUsages[usageNdx].usage,
6325                                                                                                                                                                                    flagCases[flagNdx].parentCase,
6326                                                                                                                                                                                    flagCases[flagNdx].flags));
6327                         }
6328                 }
6329
6330                 // .map_buffer_range
6331                 {
6332                         static const struct FlagCase
6333                         {
6334                                 const char*     name;
6335                                 bool            usefulForUnusedBuffers;
6336                                 bool            allUsages;
6337                                 int                     glFlags;
6338                                 int                     caseFlags;
6339                         } flagCases[] =
6340                         {
6341                                 { "flag_write_full",                                                                            true,   true,   GL_MAP_WRITE_BIT,                                                                                                                               0                                                                                                                                                               },
6342                                 { "flag_write_partial",                                                                         true,   true,   GL_MAP_WRITE_BIT,                                                                                                                               MapBufferRangeCase::FLAG_PARTIAL                                                                                                },
6343                                 { "flag_read_write_full",                                                                       true,   true,   GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,                                                                                             0                                                                                                                                                               },
6344                                 { "flag_read_write_partial",                                                            true,   true,   GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,                                                                                             MapBufferRangeCase::FLAG_PARTIAL                                                                                                },
6345                                 { "flag_invalidate_range_full",                                                         true,   false,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,                                                                 0                                                                                                                                                               },
6346                                 { "flag_invalidate_range_partial",                                                      true,   false,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,                                                                 MapBufferRangeCase::FLAG_PARTIAL                                                                                                },
6347                                 { "flag_invalidate_buffer_full",                                                        true,   false,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,                                                                0                                                                                                                                                               },
6348                                 { "flag_invalidate_buffer_partial",                                                     true,   false,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,                                                                MapBufferRangeCase::FLAG_PARTIAL                                                                                                },
6349                                 { "flag_write_full_manual_invalidate_buffer",                           false,  false,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,                                                                 MapBufferRangeCase::FLAG_MANUAL_INVALIDATION                                                                    },
6350                                 { "flag_write_partial_manual_invalidate_buffer",                        false,  false,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,                                                                 MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION },
6351                                 { "flag_unsynchronized_full",                                                           true,   false,  GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,                                                                   0                                                                                                                                                               },
6352                                 { "flag_unsynchronized_partial",                                                        true,   false,  GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,                                                                   MapBufferRangeCase::FLAG_PARTIAL                                                                                                },
6353                                 { "flag_unsynchronized_and_invalidate_buffer_full",                     true,   false,  GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,    0                                                                                                                                                               },
6354                                 { "flag_unsynchronized_and_invalidate_buffer_partial",          true,   false,  GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,    MapBufferRangeCase::FLAG_PARTIAL                                                                                                },
6355                         };
6356                         static const struct FlushCases
6357                         {
6358                                 const char*     name;
6359                                 int                     glFlags;
6360                                 int                     caseFlags;
6361                         } flushCases[] =
6362                         {
6363                                 { "flag_flush_explicit_map_full",                                       GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,   0                                                                                               },
6364                                 { "flag_flush_explicit_map_partial",                            GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,   MapBufferRangeFlushCase::FLAG_PARTIAL                   },
6365                                 { "flag_flush_explicit_map_full_flush_in_parts",        GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,   MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS    },
6366                                 { "flag_flush_explicit_map_full_flush_partial",         GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,   MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL             },
6367                         };
6368                         static const struct MapTestGroup
6369                         {
6370                                 int                                     flags;
6371                                 bool                            unusedBufferCase;
6372                                 tcu::TestCaseGroup* group;
6373                         } groups[] =
6374                         {
6375                                 { MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,       true,   new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"),                           },
6376                                 { MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,         true,   new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),                                       },
6377                                 { 0,                                                                                                            false,  new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers")        },
6378                         };
6379
6380                         // we OR same flags to both range and flushRange cases, make sure it is legal
6381                         DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
6382                         DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);
6383
6384                         for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
6385                         {
6386                                 tcu::TestCaseGroup* const bufferTypeGroup = groups[groupNdx].group;
6387
6388                                 mapBufferRangeMethodGroup->addChild(bufferTypeGroup);
6389
6390                                 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
6391                                 {
6392                                         if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
6393                                                 continue;
6394
6395                                         tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
6396                                         bufferTypeGroup->addChild(bufferUsageGroup);
6397
6398                                         for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6399                                                 if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
6400                                                         bufferUsageGroup->addChild(new MapBufferRangeCase(m_context,
6401                                                                                                                                                           bufferUsages[usageNdx].name,
6402                                                                                                                                                           std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6403                                                                                                                                                           minBufferSize,
6404                                                                                                                                                           maxBufferSize,
6405                                                                                                                                                           numMapSamples,
6406                                                                                                                                                           bufferUsages[usageNdx].usage,
6407                                                                                                                                                           flagCases[caseNdx].glFlags,
6408                                                                                                                                                           flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
6409                                 }
6410
6411                                 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
6412                                 {
6413                                         tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
6414                                         bufferTypeGroup->addChild(bufferUsageGroup);
6415
6416                                         for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6417                                                 if (bufferUsages[usageNdx].primaryUsage)
6418                                                         bufferUsageGroup->addChild(new MapBufferRangeFlushCase(m_context,
6419                                                                                                                                                                    bufferUsages[usageNdx].name,
6420                                                                                                                                                                    std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6421                                                                                                                                                                    minBufferSize,
6422                                                                                                                                                                    maxBufferSize,
6423                                                                                                                                                                    numMapSamples,
6424                                                                                                                                                                    bufferUsages[usageNdx].usage,
6425                                                                                                                                                                    flushCases[caseNdx].glFlags,
6426                                                                                                                                                                    flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
6427                                 }
6428                         }
6429                 }
6430         }
6431
6432         // .modify_after_use
6433         {
6434                 const int minBufferSize = 0;            // !< 0kiB
6435                 const int maxBufferSize = 1 << 24;      // !< 16MiB
6436
6437                 static const struct Usage
6438                 {
6439                         const char* name;
6440                         const char* description;
6441                         deUint32        usage;
6442                 } usages[] =
6443                 {
6444                         { "static_draw",        "Test with GL_STATIC_DRAW",             GL_STATIC_DRAW  },
6445                         { "dynamic_draw",       "Test with GL_DYNAMIC_DRAW",    GL_DYNAMIC_DRAW },
6446                         { "stream_draw",        "Test with GL_STREAM_DRAW",             GL_STREAM_DRAW },
6447
6448                 };
6449
6450                 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
6451                 {
6452                         tcu::TestCaseGroup* const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
6453                         modifyAfterUseGroup->addChild(usageGroup);
6454
6455                         usageGroup->addChild(new ModifyAfterWithBufferDataCase          (m_context, "buffer_data",                                                      "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6456                         usageGroup->addChild(new ModifyAfterWithBufferDataCase          (m_context, "buffer_data_different_size",                       "Respecify buffer contents and size after use",                 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
6457                         usageGroup->addChild(new ModifyAfterWithBufferDataCase          (m_context, "buffer_data_repeated",                                     "Respecify buffer contents after upload and use",               minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));
6458
6459                         usageGroup->addChild(new ModifyAfterWithBufferSubDataCase       (m_context, "buffer_sub_data_full",                                     "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6460                         usageGroup->addChild(new ModifyAfterWithBufferSubDataCase       (m_context, "buffer_sub_data_partial",                          "Respecify buffer contents partially use",                              minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6461                         usageGroup->addChild(new ModifyAfterWithBufferSubDataCase       (m_context, "buffer_sub_data_full_repeated",            "Respecify buffer contents after upload and use",               minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
6462                         usageGroup->addChild(new ModifyAfterWithBufferSubDataCase       (m_context, "buffer_sub_data_partial_repeated",         "Respecify buffer contents partially upload and use",   minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6463
6464                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_write_full",                                      "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,                                                                                                GL_MAP_WRITE_BIT));
6465                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_write_partial",                           "Respecify buffer contents partially after use",                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,  GL_MAP_WRITE_BIT));
6466                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_read_write_full",                         "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,                                                                                                GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6467                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_read_write_partial",                      "Respecify buffer contents partially after use",                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,  GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6468                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_invalidate_range_full",           "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,                                                                                                GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6469                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_invalidate_range_partial",        "Respecify buffer contents partially after use",                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6470                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_invalidate_buffer_full",          "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,                                                                                                GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6471                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_invalidate_buffer_partial",       "Respecify buffer contents partially after use",                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6472                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_unsynchronized_full",                     "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,                                                                                                GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6473                         usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase      (m_context, "map_flag_unsynchronized_partial",          "Respecify buffer contents partially after use",                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,  GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6474
6475                         usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase      (m_context, "map_flag_flush_explicit_full",                     "Respecify buffer contents after use",                                  minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,                                                                                                GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6476                         usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase      (m_context, "map_flag_flush_explicit_partial",          "Respecify buffer contents partially after use",                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,  GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6477                 }
6478         }
6479
6480         // .render_after_upload
6481         {
6482                 // .reference
6483                 {
6484                         tcu::TestCaseGroup* const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
6485                         renderAfterUploadGroup->addChild(renderReferenceGroup);
6486
6487                         // .draw
6488                         {
6489                                 tcu::TestCaseGroup* const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
6490                                 renderReferenceGroup->addChild(drawGroup);
6491
6492                                 // Time consumed by readPixels
6493                                 drawGroup->addChild(new ReferenceReadPixelsTimeCase     (m_context, "read_pixels",              "Measure time consumed by readPixels() function call"));
6494
6495                                 // Time consumed by rendering
6496                                 drawGroup->addChild(new ReferenceRenderTimeCase         (m_context, "draw_arrays",              "Measure time consumed by drawArrays() function call",          DRAWMETHOD_DRAW_ARRAYS));
6497                                 drawGroup->addChild(new ReferenceRenderTimeCase         (m_context, "draw_elements",    "Measure time consumed by drawElements() function call",        DRAWMETHOD_DRAW_ELEMENTS));
6498                         }
6499
6500                         // .draw_upload_draw
6501                         {
6502                                 static const struct
6503                                 {
6504                                         const char*             name;
6505                                         const char*             description;
6506                                         DrawMethod              drawMethod;
6507                                         TargetBuffer    targetBuffer;
6508                                         bool                    partial;
6509                                 } uploadTargets[] =
6510                                 {
6511                                         {
6512                                                 "draw_arrays_upload_vertices",
6513                                                 "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6514                                                 DRAWMETHOD_DRAW_ARRAYS,
6515                                                 TARGETBUFFER_VERTEX,
6516                                                 false
6517                                         },
6518                                         {
6519                                                 "draw_arrays_upload_vertices_partial",
6520                                                 "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6521                                                 DRAWMETHOD_DRAW_ARRAYS,
6522                                                 TARGETBUFFER_VERTEX,
6523                                                 true
6524                                         },
6525                                         {
6526                                                 "draw_elements_upload_vertices",
6527                                                 "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6528                                                 DRAWMETHOD_DRAW_ELEMENTS,
6529                                                 TARGETBUFFER_VERTEX,
6530                                                 false
6531                                         },
6532                                         {
6533                                                 "draw_elements_upload_indices",
6534                                                 "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6535                                                 DRAWMETHOD_DRAW_ELEMENTS,
6536                                                 TARGETBUFFER_INDEX,
6537                                                 false
6538                                         },
6539                                         {
6540                                                 "draw_elements_upload_indices_partial",
6541                                                 "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6542                                                 DRAWMETHOD_DRAW_ELEMENTS,
6543                                                 TARGETBUFFER_INDEX,
6544                                                 true
6545                                         },
6546                                 };
6547                                 static const struct
6548                                 {
6549                                         const char*                                                     name;
6550                                         const char*                                                     description;
6551                                         UploadMethod                                            uploadMethod;
6552                                         BufferInUseRenderTimeCase::MapFlags     mapFlags;
6553                                         bool                                                            supportsPartialUpload;
6554                                 } uploadMethods[] =
6555                                 {
6556                                         { "buffer_data",                                                "bufferData",           UPLOADMETHOD_BUFFER_DATA,               BufferInUseRenderTimeCase::MAPFLAG_NONE,                                false   },
6557                                         { "buffer_sub_data",                                    "bufferSubData",        UPLOADMETHOD_BUFFER_SUB_DATA,   BufferInUseRenderTimeCase::MAPFLAG_NONE,                                true    },
6558                                         { "map_buffer_range_invalidate_range",  "mapBufferRange",       UPLOADMETHOD_MAP_BUFFER_RANGE,  BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,    true    },
6559                                         { "map_buffer_range_invalidate_buffer", "mapBufferRange",       UPLOADMETHOD_MAP_BUFFER_RANGE,  BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,   false   },
6560                                 };
6561
6562                                 tcu::TestCaseGroup* const drawUploadDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
6563                                 renderReferenceGroup->addChild(drawUploadDrawGroup);
6564
6565                                 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6566                                 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6567                                 {
6568                                         const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6569
6570                                         if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6571                                                 continue;
6572
6573                                         drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6574                                                                                                                                                                 name.c_str(),
6575                                                                                                                                                                 uploadTargets[uploadTargetNdx].description,
6576                                                                                                                                                                 uploadTargets[uploadTargetNdx].drawMethod,
6577                                                                                                                                                                 uploadMethods[uploadMethodNdx].mapFlags,
6578                                                                                                                                                                 uploadTargets[uploadTargetNdx].targetBuffer,
6579                                                                                                                                                                 uploadMethods[uploadMethodNdx].uploadMethod,
6580                                                                                                                                                                 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6581                                                                                                                                                                 BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
6582                                 }
6583                         }
6584                 }
6585
6586                 // .upload_unrelated_and_draw
6587                 {
6588                         static const struct
6589                         {
6590                                 const char*             name;
6591                                 const char*             description;
6592                                 DrawMethod              drawMethod;
6593                         } drawMethods[] =
6594                         {
6595                                 { "draw_arrays",        "drawArrays",   DRAWMETHOD_DRAW_ARRAYS          },
6596                                 { "draw_elements",      "drawElements", DRAWMETHOD_DRAW_ELEMENTS        },
6597                         };
6598
6599                         static const struct
6600                         {
6601                                 const char*             name;
6602                                 UploadMethod    uploadMethod;
6603                         } uploadMethods[] =
6604                         {
6605                                 { "buffer_data",                UPLOADMETHOD_BUFFER_DATA                },
6606                                 { "buffer_sub_data",    UPLOADMETHOD_BUFFER_SUB_DATA    },
6607                                 { "map_buffer_range",   UPLOADMETHOD_MAP_BUFFER_RANGE   },
6608                         };
6609
6610                         tcu::TestCaseGroup* const uploadUnrelatedGroup = new tcu::TestCaseGroup(m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
6611                         renderAfterUploadGroup->addChild(uploadUnrelatedGroup);
6612
6613                         for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
6614                         for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6615                         {
6616                                 const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
6617                                 const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload";
6618
6619                                 // Time consumed by rendering command after an unrelated upload
6620
6621                                 uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod));
6622                         }
6623                 }
6624
6625                 // .upload_and_draw
6626                 {
6627                         static const struct
6628                         {
6629                                 const char*                     name;
6630                                 const char*                     description;
6631                                 BufferState                     bufferState;
6632                                 UnrelatedBufferType     unrelatedBuffer;
6633                                 bool                            supportsPartialUpload;
6634                         } bufferConfigs[] =
6635                         {
6636                                 { "used_buffer",                                                "Upload to an used buffer",                                                                                     BUFFERSTATE_EXISTING,   UNRELATEDBUFFERTYPE_NONE,       true    },
6637                                 { "new_buffer",                                                 "Upload to a new buffer",                                                                                       BUFFERSTATE_NEW,                UNRELATEDBUFFERTYPE_NONE,       false   },
6638                                 { "used_buffer_and_unrelated_upload",   "Upload to an used buffer and an unrelated buffer and then draw",       BUFFERSTATE_EXISTING,   UNRELATEDBUFFERTYPE_VERTEX,     true    },
6639                                 { "new_buffer_and_unrelated_upload",    "Upload to a new buffer and an unrelated buffer and then draw",         BUFFERSTATE_NEW,                UNRELATEDBUFFERTYPE_VERTEX,     false   },
6640                         };
6641
6642                         tcu::TestCaseGroup* const uploadAndDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
6643                         renderAfterUploadGroup->addChild(uploadAndDrawGroup);
6644
6645                         // .used_buffer
6646                         // .new_buffer
6647                         // .used_buffer_and_unrelated_upload
6648                         // .new_buffer_and_unrelated_upload
6649                         for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
6650                         {
6651                                 static const struct
6652                                 {
6653                                         const char*             name;
6654                                         const char*             description;
6655                                         DrawMethod              drawMethod;
6656                                         TargetBuffer    targetBuffer;
6657                                         bool                    partial;
6658                                 } uploadTargets[] =
6659                                 {
6660                                         {
6661                                                 "draw_arrays_upload_vertices",
6662                                                 "Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
6663                                                 DRAWMETHOD_DRAW_ARRAYS,
6664                                                 TARGETBUFFER_VERTEX,
6665                                                 false
6666                                         },
6667                                         {
6668                                                 "draw_arrays_upload_vertices_partial",
6669                                                 "Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function calls",
6670                                                 DRAWMETHOD_DRAW_ARRAYS,
6671                                                 TARGETBUFFER_VERTEX,
6672                                                 true
6673                                         },
6674                                         {
6675                                                 "draw_elements_upload_vertices",
6676                                                 "Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
6677                                                 DRAWMETHOD_DRAW_ELEMENTS,
6678                                                 TARGETBUFFER_VERTEX,
6679                                                 false
6680                                         },
6681                                         {
6682                                                 "draw_elements_upload_indices",
6683                                                 "Measure time consumed by index upload, drawElements, and readPixels function calls",
6684                                                 DRAWMETHOD_DRAW_ELEMENTS,
6685                                                 TARGETBUFFER_INDEX,
6686                                                 false
6687                                         },
6688                                         {
6689                                                 "draw_elements_upload_indices_partial",
6690                                                 "Measure time consumed by partial index upload, drawElements, and readPixels function calls",
6691                                                 DRAWMETHOD_DRAW_ELEMENTS,
6692                                                 TARGETBUFFER_INDEX,
6693                                                 true
6694                                         },
6695                                 };
6696                                 static const struct
6697                                 {
6698                                         const char*             name;
6699                                         const char*             description;
6700                                         UploadMethod    uploadMethod;
6701                                         bool                    supportsPartialUpload;
6702                                 } uploadMethods[] =
6703                                 {
6704                                         { "buffer_data",                "bufferData",           UPLOADMETHOD_BUFFER_DATA,               false   },
6705                                         { "buffer_sub_data",    "bufferSubData",        UPLOADMETHOD_BUFFER_SUB_DATA,   true    },
6706                                         { "map_buffer_range",   "mapBufferRange",       UPLOADMETHOD_MAP_BUFFER_RANGE,  true    },
6707                                 };
6708
6709                                 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description);
6710                                 uploadAndDrawGroup->addChild(group);
6711
6712                                 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6713                                 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6714                                 {
6715                                         const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6716
6717                                         if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6718                                                 continue;
6719                                         if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
6720                                                 continue;
6721
6722                                         // Don't log unrelated buffer information to samples if there is no such buffer
6723
6724                                         if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
6725                                         {
6726                                                 typedef UploadRenderReadDuration                                SampleType;
6727                                                 typedef GenericUploadRenderTimeCase<SampleType> TestType;
6728
6729                                                 group->addChild(new TestType(m_context,
6730                                                                                                          name.c_str(),
6731                                                                                                          uploadTargets[uploadTargetNdx].description,
6732                                                                                                          uploadTargets[uploadTargetNdx].drawMethod,
6733                                                                                                          uploadTargets[uploadTargetNdx].targetBuffer,
6734                                                                                                          uploadMethods[uploadMethodNdx].uploadMethod,
6735                                                                                                          bufferConfigs[stateNdx].bufferState,
6736                                                                                                          (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6737                                                                                                          bufferConfigs[stateNdx].unrelatedBuffer));
6738                                         }
6739                                         else
6740                                         {
6741                                                 typedef UploadRenderReadDurationWithUnrelatedUploadSize SampleType;
6742                                                 typedef GenericUploadRenderTimeCase<SampleType>                 TestType;
6743
6744                                                 group->addChild(new TestType(m_context,
6745                                                                                                          name.c_str(),
6746                                                                                                          uploadTargets[uploadTargetNdx].description,
6747                                                                                                          uploadTargets[uploadTargetNdx].drawMethod,
6748                                                                                                          uploadTargets[uploadTargetNdx].targetBuffer,
6749                                                                                                          uploadMethods[uploadMethodNdx].uploadMethod,
6750                                                                                                          bufferConfigs[stateNdx].bufferState,
6751                                                                                                          (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6752                                                                                                          bufferConfigs[stateNdx].unrelatedBuffer));
6753                                         }
6754                                 }
6755                         }
6756                 }
6757
6758                 // .draw_modify_draw
6759                 {
6760                         static const struct
6761                         {
6762                                 const char*             name;
6763                                 const char*             description;
6764                                 DrawMethod              drawMethod;
6765                                 TargetBuffer    targetBuffer;
6766                                 bool                    partial;
6767                         } uploadTargets[] =
6768                         {
6769                                 {
6770                                         "draw_arrays_upload_vertices",
6771                                         "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6772                                         DRAWMETHOD_DRAW_ARRAYS,
6773                                         TARGETBUFFER_VERTEX,
6774                                         false
6775                                 },
6776                                 {
6777                                         "draw_arrays_upload_vertices_partial",
6778                                         "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6779                                         DRAWMETHOD_DRAW_ARRAYS,
6780                                         TARGETBUFFER_VERTEX,
6781                                         true
6782                                 },
6783                                 {
6784                                         "draw_elements_upload_vertices",
6785                                         "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6786                                         DRAWMETHOD_DRAW_ELEMENTS,
6787                                         TARGETBUFFER_VERTEX,
6788                                         false
6789                                 },
6790                                 {
6791                                         "draw_elements_upload_indices",
6792                                         "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6793                                         DRAWMETHOD_DRAW_ELEMENTS,
6794                                         TARGETBUFFER_INDEX,
6795                                         false
6796                                 },
6797                                 {
6798                                         "draw_elements_upload_indices_partial",
6799                                         "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6800                                         DRAWMETHOD_DRAW_ELEMENTS,
6801                                         TARGETBUFFER_INDEX,
6802                                         true
6803                                 },
6804                         };
6805                         static const struct
6806                         {
6807                                 const char*                                                     name;
6808                                 const char*                                                     description;
6809                                 UploadMethod                                            uploadMethod;
6810                                 BufferInUseRenderTimeCase::MapFlags     mapFlags;
6811                                 bool                                                            supportsPartialUpload;
6812                         } uploadMethods[] =
6813                         {
6814                                 { "buffer_data",                                                "bufferData",           UPLOADMETHOD_BUFFER_DATA,               BufferInUseRenderTimeCase::MAPFLAG_NONE,                                false   },
6815                                 { "buffer_sub_data",                                    "bufferSubData",        UPLOADMETHOD_BUFFER_SUB_DATA,   BufferInUseRenderTimeCase::MAPFLAG_NONE,                                true    },
6816                                 { "map_buffer_range_invalidate_range",  "mapBufferRange",       UPLOADMETHOD_MAP_BUFFER_RANGE,  BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,    true    },
6817                                 { "map_buffer_range_invalidate_buffer", "mapBufferRange",       UPLOADMETHOD_MAP_BUFFER_RANGE,  BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,   false   },
6818                         };
6819
6820                         tcu::TestCaseGroup* const drawModifyDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use");
6821                         renderAfterUploadGroup->addChild(drawModifyDrawGroup);
6822
6823                         for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6824                         for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6825                         {
6826                                 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6827
6828                                 if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6829                                         continue;
6830
6831                                 drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6832                                                                                                                                                         name.c_str(),
6833                                                                                                                                                         uploadTargets[uploadTargetNdx].description,
6834                                                                                                                                                         uploadTargets[uploadTargetNdx].drawMethod,
6835                                                                                                                                                         uploadMethods[uploadMethodNdx].mapFlags,
6836                                                                                                                                                         uploadTargets[uploadTargetNdx].targetBuffer,
6837                                                                                                                                                         uploadMethods[uploadMethodNdx].uploadMethod,
6838                                                                                                                                                         (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6839                                                                                                                                                         BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
6840                         }
6841                 }
6842
6843                 // .upload_wait_draw
6844                 {
6845                         static const struct
6846                         {
6847                                 const char*     name;
6848                                 const char*     description;
6849                                 BufferState     bufferState;
6850                         } bufferStates[] =
6851                         {
6852                                 { "new_buffer",         "Uploading to just generated name",     BUFFERSTATE_NEW                 },
6853                                 { "used_buffer",        "Uploading to a used buffer",           BUFFERSTATE_EXISTING    },
6854                         };
6855                         static const struct
6856                         {
6857                                 const char*             name;
6858                                 const char*             description;
6859                                 DrawMethod              drawMethod;
6860                                 TargetBuffer    targetBuffer;
6861                         } uploadTargets[] =
6862                         {
6863                                 { "draw_arrays_vertices",       "Upload vertex data, draw with drawArrays",             DRAWMETHOD_DRAW_ARRAYS,         TARGETBUFFER_VERTEX     },
6864                                 { "draw_elements_vertices",     "Upload vertex data, draw with drawElements",   DRAWMETHOD_DRAW_ELEMENTS,       TARGETBUFFER_VERTEX     },
6865                                 { "draw_elements_indices",      "Upload index data, draw with drawElements",    DRAWMETHOD_DRAW_ELEMENTS,       TARGETBUFFER_INDEX      },
6866                         };
6867                         static const struct
6868                         {
6869                                 const char*             name;
6870                                 const char*             description;
6871                                 UploadMethod    uploadMethod;
6872                         } uploadMethods[] =
6873                         {
6874                                 { "buffer_data",                "bufferData",           UPLOADMETHOD_BUFFER_DATA                },
6875                                 { "buffer_sub_data",    "bufferSubData",        UPLOADMETHOD_BUFFER_SUB_DATA    },
6876                                 { "map_buffer_range",   "mapBufferRange",       UPLOADMETHOD_MAP_BUFFER_RANGE   },
6877                         };
6878
6879                         tcu::TestCaseGroup* const uploadSwapDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
6880                         renderAfterUploadGroup->addChild(uploadSwapDrawGroup);
6881
6882                         for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
6883                         {
6884                                 tcu::TestCaseGroup* const bufferGroup = new tcu::TestCaseGroup(m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
6885                                 uploadSwapDrawGroup->addChild(bufferGroup);
6886
6887                                 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6888                                 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6889                                 {
6890                                         const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6891
6892                                         bufferGroup->addChild(new UploadWaitDrawCase(m_context,
6893                                                                                                                                  name.c_str(),
6894                                                                                                                                  uploadTargets[uploadTargetNdx].description,
6895                                                                                                                                  uploadTargets[uploadTargetNdx].drawMethod,
6896                                                                                                                                  uploadTargets[uploadTargetNdx].targetBuffer,
6897                                                                                                                                  uploadMethods[uploadMethodNdx].uploadMethod,
6898                                                                                                                                  bufferStates[bufferStateNdx].bufferState));
6899                                 }
6900                         }
6901                 }
6902         }
6903 }
6904
6905 } // Performance
6906 } // gles3
6907 } // deqp