Add CTS_ARB_gl_spirv test implementation
[platform/upstream/VK-GL-CTS.git] / framework / referencerenderer / rrRenderer.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Reference Renderer
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Reference renderer interface.
22  *//*--------------------------------------------------------------------*/
23
24 #include "rrRenderer.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27 #include "tcuFloat.hpp"
28 #include "rrPrimitiveAssembler.hpp"
29 #include "rrFragmentOperations.hpp"
30 #include "rrRasterizer.hpp"
31 #include "deMemory.h"
32
33 #include <set>
34
35 namespace rr
36 {
37 namespace
38 {
39
40 typedef double ClipFloat; // floating point type used in clipping
41
42 typedef tcu::Vector<ClipFloat, 4> ClipVec4;
43
44 struct RasterizationInternalBuffers
45 {
46         std::vector<FragmentPacket>             fragmentPackets;
47         std::vector<GenericVec4>                shaderOutputs;
48         std::vector<Fragment>                   shadedFragments;
49         float*                                                  fragmentDepthBuffer;
50 };
51
52 deUint32 readIndexArray (const IndexType type, const void* ptr, size_t ndx)
53 {
54         switch (type)
55         {
56                 case INDEXTYPE_UINT8:
57                         return ((const deUint8*)ptr)[ndx];
58
59                 case INDEXTYPE_UINT16:
60                 {
61                         deUint16 retVal;
62                         deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint16), sizeof(deUint16));
63
64                         return retVal;
65                 }
66
67                 case INDEXTYPE_UINT32:
68                 {
69                         deUint32 retVal;
70                         deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint32), sizeof(deUint32));
71
72                         return retVal;
73                 }
74
75                 default:
76                         DE_ASSERT(false);
77                         return 0;
78         }
79 }
80
81 tcu::IVec4 getBufferSize (const rr::MultisampleConstPixelBufferAccess& multisampleBuffer)
82 {
83         return tcu::IVec4(0, 0, multisampleBuffer.raw().getHeight(), multisampleBuffer.raw().getDepth());
84 }
85
86 bool isEmpty (const rr::MultisampleConstPixelBufferAccess& access)
87 {
88         return access.raw().getWidth() == 0 || access.raw().getHeight() == 0 || access.raw().getDepth() == 0;
89 }
90
91 struct DrawContext
92 {
93         int primitiveID;
94
95         DrawContext (void)
96                 : primitiveID(0)
97         {
98         }
99 };
100
101 /*--------------------------------------------------------------------*//*!
102  * \brief Calculates intersection of two rects given as (left, bottom, width, height)
103  *//*--------------------------------------------------------------------*/
104 tcu::IVec4 rectIntersection (const tcu::IVec4& a, const tcu::IVec4& b)
105 {
106         const tcu::IVec2 pos    = tcu::IVec2(de::max(a.x(), b.x()), de::max(a.y(), b.y()));
107         const tcu::IVec2 endPos = tcu::IVec2(de::min(a.x() + a.z(), b.x() + b.z()), de::min(a.y() + a.w(), b.y() + b.w()));
108
109         return tcu::IVec4(pos.x(), pos.y(), endPos.x() - pos.x(), endPos.y() - pos.y());
110 }
111
112 void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::Triangle>& input)
113 {
114         std::swap(output, input);
115 }
116
117 void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::Line>& input)
118 {
119         std::swap(output, input);
120 }
121
122 void convertPrimitiveToBaseType(std::vector<pa::Point>& output, std::vector<pa::Point>& input)
123 {
124         std::swap(output, input);
125 }
126
127 void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::LineAdjacency>& input)
128 {
129         output.resize(input.size());
130         for (size_t i = 0; i < input.size(); ++i)
131         {
132                 const int adjacentProvokingVertex       = input[i].provokingIndex;
133                 const int baseProvokingVertexIndex      = adjacentProvokingVertex-1;
134                 output[i] = pa::Line(input[i].v1, input[i].v2, baseProvokingVertexIndex);
135         }
136 }
137
138 void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::TriangleAdjacency>& input)
139 {
140         output.resize(input.size());
141         for (size_t i = 0; i < input.size(); ++i)
142         {
143                 const int adjacentProvokingVertex       = input[i].provokingIndex;
144                 const int baseProvokingVertexIndex      = adjacentProvokingVertex/2;
145                 output[i] = pa::Triangle(input[i].v0, input[i].v2, input[i].v4, baseProvokingVertexIndex);
146         }
147 }
148
149 namespace cliputil
150 {
151
152 /*--------------------------------------------------------------------*//*!
153  * \brief Get clipped portion of the second endpoint
154  *
155  * Calculate the intersection of line segment v0-v1 and a given plane. Line
156  * segment is defined by a pair of one-dimensional homogeneous coordinates.
157  *
158  *//*--------------------------------------------------------------------*/
159 ClipFloat getSegmentVolumeEdgeClip (const ClipFloat v0,
160                                                                         const ClipFloat w0,
161                                                                         const ClipFloat v1,
162                                                                         const ClipFloat w1,
163                                                                         const ClipFloat plane)
164 {
165         return (plane*w0 - v0) / ((v1 - v0) - plane*(w1 - w0));
166 }
167
168 /*--------------------------------------------------------------------*//*!
169  * \brief Get clipped portion of the endpoint
170  *
171  * How much (in [0-1] range) of a line segment v0-v1 would be clipped
172  * of the v0 end of the line segment by clipping.
173  *//*--------------------------------------------------------------------*/
174 ClipFloat getLineEndpointClipping (const ClipVec4& v0, const ClipVec4& v1)
175 {
176         const ClipFloat clipVolumeSize = (ClipFloat)1.0;
177
178         if (v0.z() > v0.w())
179         {
180                 // Clip +Z
181                 return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), clipVolumeSize);
182         }
183         else if (v0.z() < -v0.w())
184         {
185                 // Clip -Z
186                 return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), -clipVolumeSize);
187         }
188         else
189         {
190                 // no clipping
191                 return (ClipFloat)0.0;
192         }
193 }
194
195 ClipVec4 vec4ToClipVec4 (const tcu::Vec4& v)
196 {
197         return ClipVec4((ClipFloat)v.x(), (ClipFloat)v.y(), (ClipFloat)v.z(), (ClipFloat)v.w());
198 }
199
200 tcu::Vec4 clipVec4ToVec4 (const ClipVec4& v)
201 {
202         return tcu::Vec4((float)v.x(), (float)v.y(), (float)v.z(), (float)v.w());
203 }
204
205 class ClipVolumePlane
206 {
207 public:
208         virtual bool            pointInClipVolume                       (const ClipVec4& p) const                                               = 0;
209         virtual ClipFloat       clipLineSegmentEnd                      (const ClipVec4& v0, const ClipVec4& v1) const  = 0;
210         virtual ClipVec4        getLineIntersectionPoint        (const ClipVec4& v0, const ClipVec4& v1) const  = 0;
211 };
212
213 template <int Sign, int CompNdx>
214 class ComponentPlane : public ClipVolumePlane
215 {
216         DE_STATIC_ASSERT(Sign == +1 || Sign == -1);
217
218 public:
219         bool            pointInClipVolume                       (const ClipVec4& p) const;
220         ClipFloat       clipLineSegmentEnd                      (const ClipVec4& v0, const ClipVec4& v1) const;
221         ClipVec4        getLineIntersectionPoint        (const ClipVec4& v0, const ClipVec4& v1) const;
222 };
223
224 template <int Sign, int CompNdx>
225 bool ComponentPlane<Sign, CompNdx>::pointInClipVolume (const ClipVec4& p) const
226 {
227         const ClipFloat clipVolumeSize = (ClipFloat)1.0;
228
229         return (ClipFloat)(Sign * p[CompNdx]) <= clipVolumeSize * p.w();
230 }
231
232 template <int Sign, int CompNdx>
233 ClipFloat ComponentPlane<Sign, CompNdx>::clipLineSegmentEnd (const ClipVec4& v0, const ClipVec4& v1) const
234 {
235         const ClipFloat clipVolumeSize = (ClipFloat)1.0;
236
237         return getSegmentVolumeEdgeClip(v0[CompNdx], v0.w(),
238                                                                         v1[CompNdx], v1.w(),
239                                                                         (ClipFloat)Sign * clipVolumeSize);
240 }
241
242 template <int Sign, int CompNdx>
243 ClipVec4 ComponentPlane<Sign, CompNdx>::getLineIntersectionPoint (const ClipVec4& v0, const ClipVec4& v1) const
244 {
245         // A point on line might be far away, causing clipping ratio (clipLineSegmentEnd) to become extremely close to 1.0
246         // even if the another point is not on the plane. Prevent clipping ratio from saturating by using points on line
247         // that are (nearly) on this and (nearly) on the opposite plane.
248
249         const ClipVec4  clippedV0       = tcu::mix(v0, v1, ComponentPlane<+1, CompNdx>().clipLineSegmentEnd(v0, v1));
250         const ClipVec4  clippedV1       = tcu::mix(v0, v1, ComponentPlane<-1, CompNdx>().clipLineSegmentEnd(v0, v1));
251         const ClipFloat clipRatio       = clipLineSegmentEnd(clippedV0, clippedV1);
252
253         // Find intersection point of line from v0 to v1 and the current plane. Avoid ratios near 1.0
254         if (clipRatio <= (ClipFloat)0.5)
255                 return tcu::mix(clippedV0, clippedV1, clipRatio);
256         else
257         {
258                 const ClipFloat complementClipRatio = clipLineSegmentEnd(clippedV1, clippedV0);
259                 return tcu::mix(clippedV1, clippedV0, complementClipRatio);
260         }
261 }
262
263 struct TriangleVertex
264 {
265         ClipVec4        position;
266         ClipFloat       weight[3];              //!< barycentrics
267 };
268
269 struct SubTriangle
270 {
271         TriangleVertex vertices[3];
272 };
273
274 void clipTriangleOneVertex (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& clipped, const TriangleVertex& v1, const TriangleVertex& v2)
275 {
276         const ClipFloat degenerateLimit = (ClipFloat)1.0;
277
278         // calc clip pos
279         TriangleVertex  mid1;
280         TriangleVertex  mid2;
281         bool                    outputDegenerate = false;
282
283         {
284                 const TriangleVertex&   inside  = v1;
285                 const TriangleVertex&   outside = clipped;
286                       TriangleVertex&   middle  = mid1;
287
288                 const ClipFloat                 hitDist = plane.clipLineSegmentEnd(inside.position, outside.position);
289
290                 if (hitDist >= degenerateLimit)
291                 {
292                         // do not generate degenerate triangles
293                         outputDegenerate = true;
294                 }
295                 else
296                 {
297                         const ClipVec4 approximatedClipPoint    = tcu::mix(inside.position, outside.position, hitDist);
298                         const ClipVec4 anotherPointOnLine               = (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
299
300                         middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
301                         middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
302                         middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
303                         middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
304                 }
305         }
306
307         {
308                 const TriangleVertex&   inside  = v2;
309                 const TriangleVertex&   outside = clipped;
310                       TriangleVertex&   middle  = mid2;
311
312                 const ClipFloat                 hitDist = plane.clipLineSegmentEnd(inside.position, outside.position);
313
314                 if (hitDist >= degenerateLimit)
315                 {
316                         // do not generate degenerate triangles
317                         outputDegenerate = true;
318                 }
319                 else
320                 {
321                         const ClipVec4 approximatedClipPoint    = tcu::mix(inside.position, outside.position, hitDist);
322                         const ClipVec4 anotherPointOnLine               = (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
323
324                         middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
325                         middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
326                         middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
327                         middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
328                 }
329         }
330
331         if (!outputDegenerate)
332         {
333                 // gen quad (v1) -> mid1 -> mid2 -> (v2)
334                 clippedEdges.push_back(v1);
335                 clippedEdges.push_back(mid1);
336                 clippedEdges.push_back(mid2);
337                 clippedEdges.push_back(v2);
338         }
339         else
340         {
341                 // don't modify
342                 clippedEdges.push_back(v1);
343                 clippedEdges.push_back(clipped);
344                 clippedEdges.push_back(v2);
345         }
346 }
347
348 void clipTriangleTwoVertices (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& v0, const TriangleVertex& clipped1, const TriangleVertex& clipped2)
349 {
350         const ClipFloat unclippableLimit = (ClipFloat)1.0;
351
352         // calc clip pos
353         TriangleVertex  mid1;
354         TriangleVertex  mid2;
355         bool                    unclippableVertex1 = false;
356         bool                    unclippableVertex2 = false;
357
358         {
359                 const TriangleVertex&   inside  = v0;
360                 const TriangleVertex&   outside = clipped1;
361                       TriangleVertex&   middle  = mid1;
362
363                 const ClipFloat                 hitDist = plane.clipLineSegmentEnd(inside.position, outside.position);
364
365                 if (hitDist >= unclippableLimit)
366                 {
367                         // this edge cannot be clipped because the edge is really close to the volume boundary
368                         unclippableVertex1 = true;
369                 }
370                 else
371                 {
372                         const ClipVec4 approximatedClipPoint    = tcu::mix(inside.position, outside.position, hitDist);
373                         const ClipVec4 anotherPointOnLine               = (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
374
375                         middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
376                         middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
377                         middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
378                         middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
379                 }
380         }
381
382         {
383                 const TriangleVertex&   inside  = v0;
384                 const TriangleVertex&   outside = clipped2;
385                       TriangleVertex&   middle  = mid2;
386
387                 const ClipFloat                 hitDist = plane.clipLineSegmentEnd(inside.position, outside.position);
388
389                 if (hitDist >= unclippableLimit)
390                 {
391                         // this edge cannot be clipped because the edge is really close to the volume boundary
392                         unclippableVertex2 = true;
393                 }
394                 else
395                 {
396                         const ClipVec4 approximatedClipPoint    = tcu::mix(inside.position, outside.position, hitDist);
397                         const ClipVec4 anotherPointOnLine               = (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
398
399                         middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
400                         middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
401                         middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
402                         middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
403                 }
404         }
405
406         if (!unclippableVertex1 && !unclippableVertex2)
407         {
408                 // gen triangle (v0) -> mid1 -> mid2
409                 clippedEdges.push_back(v0);
410                 clippedEdges.push_back(mid1);
411                 clippedEdges.push_back(mid2);
412         }
413         else if (!unclippableVertex1 && unclippableVertex2)
414         {
415                 // clip just vertex 1
416                 clippedEdges.push_back(v0);
417                 clippedEdges.push_back(mid1);
418                 clippedEdges.push_back(clipped2);
419         }
420         else if (unclippableVertex1 && !unclippableVertex2)
421         {
422                 // clip just vertex 2
423                 clippedEdges.push_back(v0);
424                 clippedEdges.push_back(clipped1);
425                 clippedEdges.push_back(mid2);
426         }
427         else
428         {
429                 // don't modify
430                 clippedEdges.push_back(v0);
431                 clippedEdges.push_back(clipped1);
432                 clippedEdges.push_back(clipped2);
433         }
434 }
435
436 void clipTriangleToPlane (std::vector<TriangleVertex>& clippedEdges, const TriangleVertex* vertices, const ClipVolumePlane& plane)
437 {
438         const bool v0Clipped = !plane.pointInClipVolume(vertices[0].position);
439         const bool v1Clipped = !plane.pointInClipVolume(vertices[1].position);
440         const bool v2Clipped = !plane.pointInClipVolume(vertices[2].position);
441         const int  clipCount = ((v0Clipped) ? (1) : (0)) + ((v1Clipped) ? (1) : (0)) + ((v2Clipped) ? (1) : (0));
442
443         if (clipCount == 0)
444         {
445                 // pass
446                 clippedEdges.insert(clippedEdges.begin(), vertices, vertices + 3);
447         }
448         else if (clipCount == 1)
449         {
450                 // clip one vertex
451                 if (v0Clipped)                  clipTriangleOneVertex(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
452                 else if (v1Clipped)             clipTriangleOneVertex(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
453                 else                                    clipTriangleOneVertex(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
454         }
455         else if (clipCount == 2)
456         {
457                 // clip two vertices
458                 if (!v0Clipped)                 clipTriangleTwoVertices(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
459                 else if (!v1Clipped)    clipTriangleTwoVertices(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
460                 else                                    clipTriangleTwoVertices(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
461         }
462         else if (clipCount == 3)
463         {
464                 // discard
465         }
466         else
467         {
468                 DE_ASSERT(DE_FALSE);
469         }
470 }
471
472 } // cliputil
473
474 tcu::Vec2 to2DCartesian (const tcu::Vec4& p)
475 {
476         return tcu::Vec2(p.x(), p.y()) / p.w();
477 }
478
479 float cross2D (const tcu::Vec2& a, const tcu::Vec2& b)
480 {
481         return tcu::cross(tcu::Vec3(a.x(), a.y(), 0.0f), tcu::Vec3(b.x(), b.y(), 0.0f)).z();
482 }
483
484 void flatshadePrimitiveVertices (pa::Triangle& target, size_t outputNdx)
485 {
486         const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
487         target.v0->outputs[outputNdx] = flatValue;
488         target.v1->outputs[outputNdx] = flatValue;
489         target.v2->outputs[outputNdx] = flatValue;
490 }
491
492 void flatshadePrimitiveVertices (pa::Line& target, size_t outputNdx)
493 {
494         const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
495         target.v0->outputs[outputNdx] = flatValue;
496         target.v1->outputs[outputNdx] = flatValue;
497 }
498
499 void flatshadePrimitiveVertices (pa::Point& target, size_t outputNdx)
500 {
501         DE_UNREF(target);
502         DE_UNREF(outputNdx);
503 }
504
505 template <typename ContainerType>
506 void flatshadeVertices (const Program& program, ContainerType& list)
507 {
508         // flatshade
509         const std::vector<rr::VertexVaryingInfo>& fragInputs = (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
510
511         for (size_t inputNdx = 0; inputNdx < fragInputs.size(); ++inputNdx)
512                 if (fragInputs[inputNdx].flatshade)
513                         for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
514                                 flatshadePrimitiveVertices(*it, inputNdx);
515 }
516
517 /*--------------------------------------------------------------------*//*!
518  * Clip triangles to the clip volume.
519  *//*--------------------------------------------------------------------*/
520 void clipPrimitives (std::vector<pa::Triangle>&         list,
521                                          const Program&                                 program,
522                                          bool                                                   clipWithZPlanes,
523                                          VertexPacketAllocator&                 vpalloc)
524 {
525         using namespace cliputil;
526
527         cliputil::ComponentPlane<+1, 0> clipPosX;
528         cliputil::ComponentPlane<-1, 0> clipNegX;
529         cliputil::ComponentPlane<+1, 1> clipPosY;
530         cliputil::ComponentPlane<-1, 1> clipNegY;
531         cliputil::ComponentPlane<+1, 2> clipPosZ;
532         cliputil::ComponentPlane<-1, 2> clipNegZ;
533
534         const std::vector<rr::VertexVaryingInfo>&       fragInputs                      = (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
535         const ClipVolumePlane*                                          planes[]                        = { &clipPosX, &clipNegX, &clipPosY, &clipNegY, &clipPosZ, &clipNegZ };
536         const int                                                                       numPlanes                       = (clipWithZPlanes) ? (6) : (4);
537
538         std::vector<pa::Triangle>                                       outputTriangles;
539
540         for (int inputTriangleNdx = 0; inputTriangleNdx < (int)list.size(); ++inputTriangleNdx)
541         {
542                 bool clippedByPlane[6];
543
544                 // Needs clipping?
545                 {
546                         bool discardPrimitive   = false;
547                         bool fullyInClipVolume  = true;
548
549                         for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
550                         {
551                                 const ClipVolumePlane*  plane                   = planes[planeNdx];
552                                 const bool                              v0InsidePlane   = plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v0->position));
553                                 const bool                              v1InsidePlane   = plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v1->position));
554                                 const bool                              v2InsidePlane   = plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v2->position));
555
556                                 // Fully outside
557                                 if (!v0InsidePlane && !v1InsidePlane && !v2InsidePlane)
558                                 {
559                                         discardPrimitive = true;
560                                         break;
561                                 }
562                                 // Partially outside
563                                 else if (!v0InsidePlane || !v1InsidePlane || !v2InsidePlane)
564                                 {
565                                         clippedByPlane[planeNdx] = true;
566                                         fullyInClipVolume = false;
567                                 }
568                                 // Fully inside
569                                 else
570                                         clippedByPlane[planeNdx] = false;
571                         }
572
573                         if (discardPrimitive)
574                                 continue;
575
576                         if (fullyInClipVolume)
577                         {
578                                 outputTriangles.push_back(list[inputTriangleNdx]);
579                                 continue;
580                         }
581                 }
582
583                 // Clip
584                 {
585                         std::vector<SubTriangle>        subTriangles    (1);
586                         SubTriangle&                            initialTri              = subTriangles[0];
587
588                         initialTri.vertices[0].position = vec4ToClipVec4(list[inputTriangleNdx].v0->position);
589                         initialTri.vertices[0].weight[0] = (ClipFloat)1.0;
590                         initialTri.vertices[0].weight[1] = (ClipFloat)0.0;
591                         initialTri.vertices[0].weight[2] = (ClipFloat)0.0;
592
593                         initialTri.vertices[1].position = vec4ToClipVec4(list[inputTriangleNdx].v1->position);
594                         initialTri.vertices[1].weight[0] = (ClipFloat)0.0;
595                         initialTri.vertices[1].weight[1] = (ClipFloat)1.0;
596                         initialTri.vertices[1].weight[2] = (ClipFloat)0.0;
597
598                         initialTri.vertices[2].position = vec4ToClipVec4(list[inputTriangleNdx].v2->position);
599                         initialTri.vertices[2].weight[0] = (ClipFloat)0.0;
600                         initialTri.vertices[2].weight[1] = (ClipFloat)0.0;
601                         initialTri.vertices[2].weight[2] = (ClipFloat)1.0;
602
603                         // Clip all subtriangles to all relevant planes
604                         for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
605                         {
606                                 std::vector<SubTriangle> nextPhaseSubTriangles;
607
608                                 if (!clippedByPlane[planeNdx])
609                                         continue;
610
611                                 for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
612                                 {
613                                         std::vector<TriangleVertex> convexPrimitive;
614
615                                         // Clip triangle and form a convex n-gon ( n c {3, 4} )
616                                         clipTriangleToPlane(convexPrimitive, subTriangles[subTriangleNdx].vertices, *planes[planeNdx]);
617
618                                         // Subtriangle completely discarded
619                                         if (convexPrimitive.empty())
620                                                 continue;
621
622                                         DE_ASSERT(convexPrimitive.size() == 3 || convexPrimitive.size() == 4);
623
624                                         //Triangulate planar convex n-gon
625                                         {
626                                                 TriangleVertex& v0 = convexPrimitive[0];
627
628                                                 for (int subsubTriangleNdx = 1; subsubTriangleNdx + 1 < (int)convexPrimitive.size(); ++subsubTriangleNdx)
629                                                 {
630                                                         const float                             degenerateEpsilon       = 1.0e-6f;
631                                                         const TriangleVertex&   v1                                      = convexPrimitive[subsubTriangleNdx];
632                                                         const TriangleVertex&   v2                                      = convexPrimitive[subsubTriangleNdx + 1];
633                                                         const float                             visibleArea                     = de::abs(cross2D(to2DCartesian(clipVec4ToVec4(v1.position)) - to2DCartesian(clipVec4ToVec4(v0.position)),
634                                                                                                                                                                                   to2DCartesian(clipVec4ToVec4(v2.position)) - to2DCartesian(clipVec4ToVec4(v0.position))));
635
636                                                         // has surface area (is not a degenerate)
637                                                         if (visibleArea >= degenerateEpsilon)
638                                                         {
639                                                                 SubTriangle subsubTriangle;
640
641                                                                 subsubTriangle.vertices[0] = v0;
642                                                                 subsubTriangle.vertices[1] = v1;
643                                                                 subsubTriangle.vertices[2] = v2;
644
645                                                                 nextPhaseSubTriangles.push_back(subsubTriangle);
646                                                         }
647                                                 }
648                                         }
649                                 }
650
651                                 subTriangles.swap(nextPhaseSubTriangles);
652                         }
653
654                         // Rebuild pa::Triangles from subtriangles
655                         for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
656                         {
657                                 VertexPacket*   p0                              = vpalloc.alloc();
658                                 VertexPacket*   p1                              = vpalloc.alloc();
659                                 VertexPacket*   p2                              = vpalloc.alloc();
660                                 pa::Triangle    ngonFragment    (p0, p1, p2, -1);
661
662                                 p0->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[0].position);
663                                 p1->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[1].position);
664                                 p2->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[2].position);
665
666                                 for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
667                                 {
668                                         if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
669                                         {
670                                                 const tcu::Vec4 out0 = list[inputTriangleNdx].v0->outputs[outputNdx].get<float>();
671                                                 const tcu::Vec4 out1 = list[inputTriangleNdx].v1->outputs[outputNdx].get<float>();
672                                                 const tcu::Vec4 out2 = list[inputTriangleNdx].v2->outputs[outputNdx].get<float>();
673
674                                                 p0->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[0].weight[0] * out0
675                                                                                            + (float)subTriangles[subTriangleNdx].vertices[0].weight[1] * out1
676                                                                                            + (float)subTriangles[subTriangleNdx].vertices[0].weight[2] * out2;
677
678                                                 p1->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[1].weight[0] * out0
679                                                                                            + (float)subTriangles[subTriangleNdx].vertices[1].weight[1] * out1
680                                                                                            + (float)subTriangles[subTriangleNdx].vertices[1].weight[2] * out2;
681
682                                                 p2->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[2].weight[0] * out0
683                                                                                            + (float)subTriangles[subTriangleNdx].vertices[2].weight[1] * out1
684                                                                                            + (float)subTriangles[subTriangleNdx].vertices[2].weight[2] * out2;
685                                         }
686                                         else
687                                         {
688                                                 // only floats are interpolated, all others must be flatshaded then
689                                                 p0->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
690                                                 p1->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
691                                                 p2->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
692                                         }
693                                 }
694
695                                 outputTriangles.push_back(ngonFragment);
696                         }
697                 }
698         }
699
700         // output result
701         list.swap(outputTriangles);
702 }
703
704 /*--------------------------------------------------------------------*//*!
705  * Clip lines to the near and far clip planes.
706  *
707  * Clipping to other planes is a by-product of the viewport test  (i.e.
708  * rasterization area selection).
709  *//*--------------------------------------------------------------------*/
710 void clipPrimitives (std::vector<pa::Line>&                     list,
711                                          const Program&                                 program,
712                                          bool                                                   clipWithZPlanes,
713                                          VertexPacketAllocator&                 vpalloc)
714 {
715         DE_UNREF(vpalloc);
716
717         using namespace cliputil;
718
719         // Lines are clipped only by the far and the near planes here. Line clipping by other planes done in the rasterization phase
720
721         const std::vector<rr::VertexVaryingInfo>&       fragInputs      = (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
722         std::vector<pa::Line>                                           visibleLines;
723
724         // Z-clipping disabled, don't do anything
725         if (!clipWithZPlanes)
726                 return;
727
728         for (size_t ndx = 0; ndx < list.size(); ++ndx)
729         {
730                 pa::Line& l = list[ndx];
731
732                 // Totally discarded?
733                 if ((l.v0->position.z() < -l.v0->position.w() && l.v1->position.z() < -l.v1->position.w()) ||
734                         (l.v0->position.z() >  l.v0->position.w() && l.v1->position.z() >  l.v1->position.w()))
735                         continue; // discard
736
737                 // Something is visible
738
739                 const ClipVec4  p0      = vec4ToClipVec4(l.v0->position);
740                 const ClipVec4  p1      = vec4ToClipVec4(l.v1->position);
741                 const ClipFloat t0      = getLineEndpointClipping(p0, p1);
742                 const ClipFloat t1      = getLineEndpointClipping(p1, p0);
743
744                 // Not clipped at all?
745                 if (t0 == (ClipFloat)0.0 && t1 == (ClipFloat)0.0)
746                 {
747                         visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
748                 }
749                 else
750                 {
751                         // Clip position
752                         l.v0->position = clipVec4ToVec4(tcu::mix(p0, p1, t0));
753                         l.v1->position = clipVec4ToVec4(tcu::mix(p1, p0, t1));
754
755                         // Clip attributes
756                         for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
757                         {
758                                 // only floats are clipped, other types are flatshaded
759                                 if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
760                                 {
761                                         const tcu::Vec4 a0 = l.v0->outputs[outputNdx].get<float>();
762                                         const tcu::Vec4 a1 = l.v1->outputs[outputNdx].get<float>();
763
764                                         l.v0->outputs[outputNdx] = tcu::mix(a0, a1, (float)t0);
765                                         l.v1->outputs[outputNdx] = tcu::mix(a1, a0, (float)t1);
766                                 }
767                         }
768
769                         visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
770                 }
771         }
772
773         // return visible in list
774         std::swap(visibleLines, list);
775 }
776
777 /*--------------------------------------------------------------------*//*!
778  * Discard points not within clip volume. Clipping is a by-product
779  * of the viewport test.
780  *//*--------------------------------------------------------------------*/
781 void clipPrimitives (std::vector<pa::Point>&            list,
782                                          const Program&                                 program,
783                                          bool                                                   clipWithZPlanes,
784                                          VertexPacketAllocator&                 vpalloc)
785 {
786         DE_UNREF(vpalloc);
787         DE_UNREF(program);
788
789         std::vector<pa::Point> visiblePoints;
790
791         // Z-clipping disabled, don't do anything
792         if (!clipWithZPlanes)
793                 return;
794
795         for (size_t ndx = 0; ndx < list.size(); ++ndx)
796         {
797                 pa::Point& p = list[ndx];
798
799                 // points are discarded if Z is not in range. (Wide) point clipping is done in the rasterization phase
800                 if (de::inRange(p.v0->position.z(), -p.v0->position.w(), p.v0->position.w()))
801                         visiblePoints.push_back(pa::Point(p.v0));
802         }
803
804         // return visible in list
805         std::swap(visiblePoints, list);
806 }
807
808 void transformVertexClipCoordsToWindowCoords (const RenderState& state, VertexPacket& packet)
809 {
810         // To normalized device coords
811         {
812                 packet.position = tcu::Vec4(packet.position.x()/packet.position.w(),
813                                                                         packet.position.y()/packet.position.w(),
814                                                                         packet.position.z()/packet.position.w(),
815                                                                         1.0f               /packet.position.w());
816         }
817
818         // To window coords
819         {
820                 const WindowRectangle&  viewport        = state.viewport.rect;
821                 const float                             halfW           = (float)(viewport.width) / 2.0f;
822                 const float                             halfH           = (float)(viewport.height) / 2.0f;
823                 const float                             oX                      = (float)viewport.left + halfW;
824                 const float                             oY                      = (float)viewport.bottom + halfH;
825                 const float                             zn                      = state.viewport.zn;
826                 const float                             zf                      = state.viewport.zf;
827
828                 packet.position = tcu::Vec4(packet.position.x()*halfW + oX,
829                                                                         packet.position.y()*halfH + oY,
830                                                                         packet.position.z()*(zf - zn)/2.0f + (zn + zf)/2.0f,
831                                                                         packet.position.w());
832         }
833 }
834
835 void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Triangle& target)
836 {
837         transformVertexClipCoordsToWindowCoords(state, *target.v0);
838         transformVertexClipCoordsToWindowCoords(state, *target.v1);
839         transformVertexClipCoordsToWindowCoords(state, *target.v2);
840 }
841
842 void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Line& target)
843 {
844         transformVertexClipCoordsToWindowCoords(state, *target.v0);
845         transformVertexClipCoordsToWindowCoords(state, *target.v1);
846 }
847
848 void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Point& target)
849 {
850         transformVertexClipCoordsToWindowCoords(state, *target.v0);
851 }
852
853 template <typename ContainerType>
854 void transformClipCoordsToWindowCoords (const RenderState& state, ContainerType& list)
855 {
856         for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
857                 transformPrimitiveClipCoordsToWindowCoords(state, *it);
858 }
859
860 void makeSharedVerticeDistinct (VertexPacket*& packet, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
861 {
862         // distinct
863         if (vertices.find(packet) == vertices.end())
864         {
865                 vertices.insert(packet);
866         }
867         else
868         {
869                 VertexPacket* newPacket = vpalloc.alloc();
870
871                 // copy packet output values
872                 newPacket->position             = packet->position;
873                 newPacket->pointSize    = packet->pointSize;
874                 newPacket->primitiveID  = packet->primitiveID;
875
876                 for (size_t outputNdx = 0; outputNdx < vpalloc.getNumVertexOutputs(); ++outputNdx)
877                         newPacket->outputs[outputNdx] = packet->outputs[outputNdx];
878
879                 // no need to insert new packet to "vertices" as newPacket is unique
880                 packet = newPacket;
881         }
882 }
883
884 void makeSharedVerticesDistinct (pa::Triangle& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
885 {
886         makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
887         makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
888         makeSharedVerticeDistinct(target.v2, vertices, vpalloc);
889 }
890
891 void makeSharedVerticesDistinct (pa::Line& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
892 {
893         makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
894         makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
895 }
896
897 void makeSharedVerticesDistinct (pa::Point& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
898 {
899         makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
900 }
901
902 template <typename ContainerType>
903 void makeSharedVerticesDistinct (ContainerType& list, VertexPacketAllocator& vpalloc)
904 {
905         std::set<VertexPacket*, std::less<void*> > vertices;
906
907         for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
908                 makeSharedVerticesDistinct(*it, vertices, vpalloc);
909 }
910
911 void generatePrimitiveIDs (pa::Triangle& target, int id)
912 {
913         target.v0->primitiveID = id;
914         target.v1->primitiveID = id;
915         target.v2->primitiveID = id;
916 }
917
918 void generatePrimitiveIDs (pa::Line& target, int id)
919 {
920         target.v0->primitiveID = id;
921         target.v1->primitiveID = id;
922 }
923
924 void generatePrimitiveIDs (pa::Point& target, int id)
925 {
926         target.v0->primitiveID = id;
927 }
928
929 template <typename ContainerType>
930 void generatePrimitiveIDs (ContainerType& list, DrawContext& drawContext)
931 {
932         for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
933                 generatePrimitiveIDs(*it, drawContext.primitiveID++);
934 }
935
936 static float findTriangleVertexDepthSlope (const tcu::Vec4& p, const tcu::Vec4& v0, const tcu::Vec4& v1)
937 {
938         // screen space
939         const tcu::Vec3 ssp             =  p.swizzle(0, 1, 2);
940         const tcu::Vec3 ssv0    = v0.swizzle(0, 1, 2);
941         const tcu::Vec3 ssv1    = v1.swizzle(0, 1, 2);
942
943         // dx & dy
944
945         const tcu::Vec3 a               = ssv0.swizzle(0,1,2) - ssp.swizzle(0,1,2);
946         const tcu::Vec3 b               = ssv1.swizzle(0,1,2) - ssp.swizzle(0,1,2);
947         const float             epsilon = 0.0001f;
948         const float             det             = (a.x() * b.y() - b.x() * a.y());
949
950         // degenerate triangle, it won't generate any fragments anyway. Return value doesn't matter
951         if (de::abs(det) < epsilon)
952                 return 0.0f;
953
954         const tcu::Vec2 dxDir   = tcu::Vec2( b.y(), -a.y()) / det;
955         const tcu::Vec2 dyDir   = tcu::Vec2(-b.x(),  a.x()) / det;
956
957         const float             dzdx    = dxDir.x() * a.z() + dxDir.y() * b.z();
958         const float             dzdy    = dyDir.x() * a.z() + dyDir.y() * b.z();
959
960         // approximate using max(|dz/dx|, |dz/dy|)
961         return de::max(de::abs(dzdx), de::abs(dzdy));
962 }
963
964 static float findPrimitiveMaximumDepthSlope (const pa::Triangle& triangle)
965 {
966         const float d1 = findTriangleVertexDepthSlope(triangle.v0->position, triangle.v1->position, triangle.v2->position);
967         const float d2 = findTriangleVertexDepthSlope(triangle.v1->position, triangle.v2->position, triangle.v0->position);
968         const float d3 = findTriangleVertexDepthSlope(triangle.v2->position, triangle.v0->position, triangle.v1->position);
969
970         return de::max(d1, de::max(d2, d3));
971 }
972
973 static float getFloatingPointMinimumResolvableDifference (float maxZValue, tcu::TextureFormat::ChannelType type)
974 {
975         if (type == tcu::TextureFormat::FLOAT)
976         {
977                 // 32f
978                 const int maxExponent = tcu::Float32(maxZValue).exponent();
979                 return tcu::Float32::construct(+1, maxExponent - 23, 1 << 23).asFloat();
980         }
981
982         // unexpected format
983         DE_ASSERT(false);
984         return 0.0f;
985 }
986
987 static float getFixedPointMinimumResolvableDifference (int numBits)
988 {
989         return tcu::Float32::construct(+1, -numBits, 1 << 23).asFloat();
990 }
991
992 static float findPrimitiveMinimumResolvableDifference (const pa::Triangle& triangle, const rr::MultisampleConstPixelBufferAccess& depthAccess)
993 {
994         const float                                                             maxZvalue               = de::max(de::max(triangle.v0->position.z(), triangle.v1->position.z()), triangle.v2->position.z());
995         const tcu::TextureFormat                                format                  = depthAccess.raw().getFormat();
996         const tcu::TextureFormat::ChannelOrder  order                   = format.order;
997
998         if (order == tcu::TextureFormat::D)
999         {
1000                 // depth only
1001                 const tcu::TextureFormat::ChannelType   channelType             = format.type;
1002                 const tcu::TextureChannelClass                  channelClass    = tcu::getTextureChannelClass(channelType);
1003                 const int                                                               numBits                 = tcu::getTextureFormatBitDepth(format).x();
1004
1005                 if (channelClass == tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
1006                         return getFloatingPointMinimumResolvableDifference(maxZvalue, channelType);
1007                 else
1008                         // \note channelClass might be CLASS_LAST but that's ok
1009                         return getFixedPointMinimumResolvableDifference(numBits);
1010         }
1011         else if (order == tcu::TextureFormat::DS)
1012         {
1013                 // depth stencil, special cases for possible combined formats
1014                 if (format.type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
1015                         return getFloatingPointMinimumResolvableDifference(maxZvalue, tcu::TextureFormat::FLOAT);
1016                 else if (format.type == tcu::TextureFormat::UNSIGNED_INT_24_8)
1017                         return getFixedPointMinimumResolvableDifference(24);
1018         }
1019
1020         // unexpected format
1021         DE_ASSERT(false);
1022         return 0.0f;
1023 }
1024
1025 void writeFragmentPackets (const RenderState&                                   state,
1026                                                    const RenderTarget&                                  renderTarget,
1027                                                    const Program&                                               program,
1028                                                    const FragmentPacket*                                fragmentPackets,
1029                                                    int                                                                  numRasterizedPackets,
1030                                                    rr::FaceType                                                 facetype,
1031                                                    const std::vector<rr::GenericVec4>&  fragmentOutputArray,
1032                                                    const float*                                                 depthValues,
1033                                                    std::vector<Fragment>&                               fragmentBuffer)
1034 {
1035         const int                       numSamples              = renderTarget.getNumSamples();
1036         const size_t            numOutputs              = program.fragmentShader->getOutputs().size();
1037         FragmentProcessor       fragProcessor;
1038
1039         DE_ASSERT(fragmentOutputArray.size() >= (size_t)numRasterizedPackets*4*numOutputs);
1040         DE_ASSERT(fragmentBuffer.size()      >= (size_t)numRasterizedPackets*4);
1041
1042         // Translate fragments but do not set the value yet
1043         {
1044                 int     fragCount = 0;
1045                 for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
1046                 for (int fragNdx = 0; fragNdx < 4; fragNdx++)
1047                 {
1048                         const FragmentPacket&   packet  = fragmentPackets[packetNdx];
1049                         const int                               xo              = fragNdx%2;
1050                         const int                               yo              = fragNdx/2;
1051
1052                         if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
1053                         {
1054                                 Fragment& fragment              = fragmentBuffer[fragCount++];
1055
1056                                 fragment.pixelCoord             = packet.position + tcu::IVec2(xo, yo);
1057                                 fragment.coverage               = (deUint32)((packet.coverage & getCoverageFragmentSampleBits(numSamples, xo, yo)) >> getCoverageOffset(numSamples, xo, yo));
1058                                 fragment.sampleDepths   = (depthValues) ? (&depthValues[(packetNdx*4 + yo*2 + xo)*numSamples]) : (DE_NULL);
1059                         }
1060                 }
1061         }
1062
1063         // Set per output output values
1064         {
1065                 rr::FragmentOperationState noStencilDepthWriteState(state.fragOps);
1066                 noStencilDepthWriteState.depthMask                                              = false;
1067                 noStencilDepthWriteState.stencilStates[facetype].sFail  = STENCILOP_KEEP;
1068                 noStencilDepthWriteState.stencilStates[facetype].dpFail = STENCILOP_KEEP;
1069                 noStencilDepthWriteState.stencilStates[facetype].dpPass = STENCILOP_KEEP;
1070
1071                 int     fragCount = 0;
1072                 for (size_t outputNdx = 0; outputNdx < numOutputs; ++outputNdx)
1073                 {
1074                         // Only the last output-pass has default state, other passes have stencil & depth writemask=0
1075                         const rr::FragmentOperationState& fragOpsState = (outputNdx == numOutputs-1) ? (state.fragOps) : (noStencilDepthWriteState);
1076
1077                         for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
1078                         for (int fragNdx = 0; fragNdx < 4; fragNdx++)
1079                         {
1080                                 const FragmentPacket&   packet  = fragmentPackets[packetNdx];
1081                                 const int                               xo              = fragNdx%2;
1082                                 const int                               yo              = fragNdx/2;
1083
1084                                 // Add only fragments that have live samples to shaded fragments queue.
1085                                 if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
1086                                 {
1087                                         Fragment& fragment              = fragmentBuffer[fragCount++];
1088                                         fragment.value                  = fragmentOutputArray[(packetNdx*4 + fragNdx) * numOutputs + outputNdx];
1089                                 }
1090                         }
1091
1092                         // Execute per-fragment ops and write
1093                         fragProcessor.render(renderTarget.getColorBuffer((int)outputNdx), renderTarget.getDepthBuffer(), renderTarget.getStencilBuffer(), &fragmentBuffer[0], fragCount, facetype, fragOpsState);
1094                 }
1095         }
1096 }
1097
1098 void rasterizePrimitive (const RenderState&                                     state,
1099                                                  const RenderTarget&                            renderTarget,
1100                                                  const Program&                                         program,
1101                                                  const pa::Triangle&                            triangle,
1102                                                  const tcu::IVec4&                                      renderTargetRect,
1103                                                  RasterizationInternalBuffers&          buffers)
1104 {
1105         const int                       numSamples              = renderTarget.getNumSamples();
1106         const float                     depthClampMin   = de::min(state.viewport.zn, state.viewport.zf);
1107         const float                     depthClampMax   = de::max(state.viewport.zn, state.viewport.zf);
1108         TriangleRasterizer      rasterizer              (renderTargetRect, numSamples, state.rasterization);
1109         float                           depthOffset             = 0.0f;
1110
1111         rasterizer.init(triangle.v0->position, triangle.v1->position, triangle.v2->position);
1112
1113         // Culling
1114         const FaceType visibleFace = rasterizer.getVisibleFace();
1115         if ((state.cullMode == CULLMODE_FRONT   && visibleFace == FACETYPE_FRONT) ||
1116                 (state.cullMode == CULLMODE_BACK        && visibleFace == FACETYPE_BACK))
1117                 return;
1118
1119         // Shading context
1120         FragmentShadingContext shadingContext(triangle.v0->outputs, triangle.v1->outputs, triangle.v2->outputs, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, triangle.v2->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples, rasterizer.getVisibleFace());
1121
1122         // Polygon offset
1123         if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
1124         {
1125                 const float maximumDepthSlope                   = findPrimitiveMaximumDepthSlope(triangle);
1126                 const float minimumResolvableDifference = findPrimitiveMinimumResolvableDifference(triangle, renderTarget.getDepthBuffer());
1127
1128                 depthOffset = maximumDepthSlope * state.fragOps.polygonOffsetFactor + minimumResolvableDifference * state.fragOps.polygonOffsetUnits;
1129         }
1130
1131         // Execute rasterize - shade - write loop
1132         for (;;)
1133         {
1134                 const int       maxFragmentPackets              = (int)buffers.fragmentPackets.size();
1135                 int                     numRasterizedPackets    = 0;
1136
1137                 // Rasterize
1138
1139                 rasterizer.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1140
1141                 // numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
1142
1143                 if (!numRasterizedPackets)
1144                         break; // Rasterization finished.
1145
1146                 // Polygon offset
1147                 if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
1148                         for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1149                                 buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx] + depthOffset, 0.0f, 1.0f);
1150
1151                 // Shade
1152
1153                 program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
1154
1155                 // Depth clamp
1156                 if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
1157                         for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1158                                 buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
1159
1160                 // Handle fragment shader outputs
1161
1162                 writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, visibleFace, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
1163         }
1164 }
1165
1166 void rasterizePrimitive (const RenderState&                                     state,
1167                                                  const RenderTarget&                            renderTarget,
1168                                                  const Program&                                         program,
1169                                                  const pa::Line&                                        line,
1170                                                  const tcu::IVec4&                                      renderTargetRect,
1171                                                  RasterizationInternalBuffers&          buffers)
1172 {
1173         const int                                       numSamples                      = renderTarget.getNumSamples();
1174         const float                                     depthClampMin           = de::min(state.viewport.zn, state.viewport.zf);
1175         const float                                     depthClampMax           = de::max(state.viewport.zn, state.viewport.zf);
1176         const bool                                      msaa                            = numSamples > 1;
1177         FragmentShadingContext          shadingContext          (line.v0->outputs, line.v1->outputs, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, line.v1->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples, FACETYPE_FRONT);
1178         SingleSampleLineRasterizer      aliasedRasterizer       (renderTargetRect);
1179         MultiSampleLineRasterizer       msaaRasterizer          (numSamples, renderTargetRect);
1180
1181         // Initialize rasterization.
1182         if (msaa)
1183                 msaaRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);
1184         else
1185                 aliasedRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);
1186
1187         for (;;)
1188         {
1189                 const int       maxFragmentPackets              = (int)buffers.fragmentPackets.size();
1190                 int                     numRasterizedPackets    = 0;
1191
1192                 // Rasterize
1193
1194                 if (msaa)
1195                         msaaRasterizer.rasterize        (&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1196                 else
1197                         aliasedRasterizer.rasterize     (&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1198
1199                 // numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
1200
1201                 if (!numRasterizedPackets)
1202                         break; // Rasterization finished.
1203
1204                 // Shade
1205
1206                 program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
1207
1208                 // Depth clamp
1209                 if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
1210                         for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1211                                 buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
1212
1213                 // Handle fragment shader outputs
1214
1215                 writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
1216         }
1217 }
1218
1219 void rasterizePrimitive (const RenderState&                                     state,
1220                                                  const RenderTarget&                            renderTarget,
1221                                                  const Program&                                         program,
1222                                                  const pa::Point&                                       point,
1223                                                  const tcu::IVec4&                                      renderTargetRect,
1224                                                  RasterizationInternalBuffers&          buffers)
1225 {
1226         const int                       numSamples              = renderTarget.getNumSamples();
1227         const float                     depthClampMin   = de::min(state.viewport.zn, state.viewport.zf);
1228         const float                     depthClampMax   = de::max(state.viewport.zn, state.viewport.zf);
1229         TriangleRasterizer      rasterizer1             (renderTargetRect, numSamples, state.rasterization);
1230         TriangleRasterizer      rasterizer2             (renderTargetRect, numSamples, state.rasterization);
1231
1232         // draw point as two triangles
1233         const float offset                              = point.v0->pointSize / 2.0f;
1234         const tcu::Vec4         w0                      = tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
1235         const tcu::Vec4         w1                      = tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
1236         const tcu::Vec4         w2                      = tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());
1237         const tcu::Vec4         w3                      = tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());
1238
1239         rasterizer1.init(w0, w1, w2);
1240         rasterizer2.init(w0, w2, w3);
1241
1242         // Shading context
1243         FragmentShadingContext shadingContext(point.v0->outputs, DE_NULL, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, point.v0->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples, FACETYPE_FRONT);
1244
1245         // Execute rasterize - shade - write loop
1246         for (;;)
1247         {
1248                 const int       maxFragmentPackets              = (int)buffers.fragmentPackets.size();
1249                 int                     numRasterizedPackets    = 0;
1250
1251                 // Rasterize both triangles
1252
1253                 rasterizer1.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1254                 if (numRasterizedPackets != maxFragmentPackets)
1255                 {
1256                         float* const    depthBufferAppendPointer        = (buffers.fragmentDepthBuffer) ? (buffers.fragmentDepthBuffer + numRasterizedPackets*numSamples*4) : (DE_NULL);
1257                         int                             numRasterizedPackets2           = 0;
1258
1259                         rasterizer2.rasterize(&buffers.fragmentPackets[numRasterizedPackets], depthBufferAppendPointer, maxFragmentPackets - numRasterizedPackets, numRasterizedPackets2);
1260
1261                         numRasterizedPackets += numRasterizedPackets2;
1262                 }
1263
1264                 // numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
1265
1266                 if (!numRasterizedPackets)
1267                         break; // Rasterization finished.
1268
1269                 // Shade
1270
1271                 program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
1272
1273                 // Depth clamp
1274                 if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
1275                         for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1276                                 buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
1277
1278                 // Handle fragment shader outputs
1279
1280                 writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
1281         }
1282 }
1283
1284 template <typename ContainerType>
1285 void rasterize (const RenderState&                                      state,
1286                                 const RenderTarget&                                     renderTarget,
1287                                 const Program&                                          program,
1288                                 const ContainerType&                            list)
1289 {
1290         const int                                               numSamples                      = renderTarget.getNumSamples();
1291         const int                                               numFragmentOutputs      = (int)program.fragmentShader->getOutputs().size();
1292         const size_t                                    maxFragmentPackets      = 128;
1293
1294         const tcu::IVec4                                viewportRect            = tcu::IVec4(state.viewport.rect.left, state.viewport.rect.bottom, state.viewport.rect.width, state.viewport.rect.height);
1295         const tcu::IVec4                                bufferRect                      = getBufferSize(renderTarget.getColorBuffer(0));
1296         const tcu::IVec4                                renderTargetRect        = rectIntersection(viewportRect, bufferRect);
1297
1298         // shared buffers for all primitives
1299         std::vector<FragmentPacket>             fragmentPackets         (maxFragmentPackets);
1300         std::vector<GenericVec4>                shaderOutputs           (maxFragmentPackets*4*numFragmentOutputs);
1301         std::vector<Fragment>                   shadedFragments         (maxFragmentPackets*4);
1302         std::vector<float>                              depthValues                     (0);
1303         float*                                                  depthBufferPointer      = DE_NULL;
1304
1305         RasterizationInternalBuffers    buffers;
1306
1307         // calculate depth only if we have a depth buffer
1308         if (!isEmpty(renderTarget.getDepthBuffer()))
1309         {
1310                 depthValues.resize(maxFragmentPackets*4*numSamples);
1311                 depthBufferPointer = &depthValues[0];
1312         }
1313
1314         // set buffers
1315         buffers.fragmentPackets.swap(fragmentPackets);
1316         buffers.shaderOutputs.swap(shaderOutputs);
1317         buffers.shadedFragments.swap(shadedFragments);
1318         buffers.fragmentDepthBuffer = depthBufferPointer;
1319
1320         // rasterize
1321         for (typename ContainerType::const_iterator it = list.begin(); it != list.end(); ++it)
1322                 rasterizePrimitive(state, renderTarget, program, *it, renderTargetRect, buffers);
1323 }
1324
1325 /*--------------------------------------------------------------------*//*!
1326  * Draws transformed triangles, lines or points to render target
1327  *//*--------------------------------------------------------------------*/
1328 template <typename ContainerType>
1329 void drawBasicPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, ContainerType& primList, VertexPacketAllocator& vpalloc)
1330 {
1331         const bool clipZ = !state.fragOps.depthClampEnabled;
1332
1333         // Transform feedback
1334
1335         // Flatshading
1336         flatshadeVertices(program, primList);
1337
1338         // Clipping
1339         // \todo [jarkko] is creating & swapping std::vectors really a good solution?
1340         clipPrimitives(primList, program, clipZ, vpalloc);
1341
1342         // Transform vertices to window coords
1343         transformClipCoordsToWindowCoords(state, primList);
1344
1345         // Rasterize and paint
1346         rasterize(state, renderTarget, program, primList);
1347 }
1348
1349 void copyVertexPacketPointers(const VertexPacket** dst, const pa::Point& in)
1350 {
1351         dst[0] = in.v0;
1352 }
1353
1354 void copyVertexPacketPointers(const VertexPacket** dst, const pa::Line& in)
1355 {
1356         dst[0] = in.v0;
1357         dst[1] = in.v1;
1358 }
1359
1360 void copyVertexPacketPointers(const VertexPacket** dst, const pa::Triangle& in)
1361 {
1362         dst[0] = in.v0;
1363         dst[1] = in.v1;
1364         dst[2] = in.v2;
1365 }
1366
1367 void copyVertexPacketPointers(const VertexPacket** dst, const pa::LineAdjacency& in)
1368 {
1369         dst[0] = in.v0;
1370         dst[1] = in.v1;
1371         dst[2] = in.v2;
1372         dst[3] = in.v3;
1373 }
1374
1375 void copyVertexPacketPointers(const VertexPacket** dst, const pa::TriangleAdjacency& in)
1376 {
1377         dst[0] = in.v0;
1378         dst[1] = in.v1;
1379         dst[2] = in.v2;
1380         dst[3] = in.v3;
1381         dst[4] = in.v4;
1382         dst[5] = in.v5;
1383 }
1384
1385 template <PrimitiveType DrawPrimitiveType> // \note DrawPrimitiveType  can only be Points, line_strip, or triangle_strip
1386 void drawGeometryShaderOutputAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, size_t numVertices, VertexPacketAllocator& vpalloc)
1387 {
1388         // Run primitive assembly for generated stream
1389
1390         const size_t                                                                                                                    assemblerPrimitiveCount         = PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
1391         std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType>  inputPrimitives                         (assemblerPrimitiveCount);
1392
1393         PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, numVertices, state.provokingVertexConvention); // \note input Primitives are baseType_t => only basic primitives (non adjacency) will compile
1394
1395         // Make shared vertices distinct
1396
1397         makeSharedVerticesDistinct(inputPrimitives, vpalloc);
1398
1399         // Draw assembled primitives
1400
1401         drawBasicPrimitives(state, renderTarget, program, inputPrimitives, vpalloc);
1402 }
1403
1404 template <PrimitiveType DrawPrimitiveType>
1405 void drawWithGeometryShader(const RenderState& state, const RenderTarget& renderTarget, const Program& program, std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>& input, DrawContext& drawContext)
1406 {
1407         // Vertices outputted by geometry shader may have different number of output variables than the original, create new memory allocator
1408         VertexPacketAllocator vpalloc(program.geometryShader->getOutputs().size());
1409
1410         // Run geometry shader for all primitives
1411         GeometryEmitter                                 emitter                 (vpalloc, program.geometryShader->getNumVerticesOut());
1412         std::vector<PrimitivePacket>    primitives              (input.size());
1413         const int                                               numInvocations  = (int)program.geometryShader->getNumInvocations();
1414         const int                                               verticesIn              = PrimitiveTypeTraits<DrawPrimitiveType>::Type::NUM_VERTICES;
1415
1416         for (size_t primitiveNdx = 0; primitiveNdx < input.size(); ++primitiveNdx)
1417         {
1418                 primitives[primitiveNdx].primitiveIDIn = drawContext.primitiveID++;
1419                 copyVertexPacketPointers(primitives[primitiveNdx].vertices, input[primitiveNdx]);
1420         }
1421
1422         if (primitives.empty())
1423                 return;
1424
1425         for (int invocationNdx = 0; invocationNdx < numInvocations; ++invocationNdx)
1426         {
1427                 // Shading invocation
1428
1429                 program.geometryShader->shadePrimitives(emitter, verticesIn, &primitives[0], (int)primitives.size(), invocationNdx);
1430
1431                 // Find primitives in the emitted vertices
1432
1433                 std::vector<VertexPacket*> emitted;
1434                 emitter.moveEmittedTo(emitted);
1435
1436                 for (size_t primitiveBegin = 0; primitiveBegin < emitted.size();)
1437                 {
1438                         size_t primitiveEnd;
1439
1440                         // Find primitive begin
1441                         if (!emitted[primitiveBegin])
1442                         {
1443                                 ++primitiveBegin;
1444                                 continue;
1445                         }
1446
1447                         // Find primitive end
1448
1449                         primitiveEnd = primitiveBegin + 1;
1450                         for (; (primitiveEnd < emitted.size()) && emitted[primitiveEnd]; ++primitiveEnd); // find primitive end
1451
1452                         // Draw range [begin, end)
1453
1454                         switch (program.geometryShader->getOutputType())
1455                         {
1456                                 case rr::GEOMETRYSHADEROUTPUTTYPE_POINTS:                       drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_POINTS>                      (state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
1457                                 case rr::GEOMETRYSHADEROUTPUTTYPE_LINE_STRIP:           drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_LINE_STRIP>          (state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
1458                                 case rr::GEOMETRYSHADEROUTPUTTYPE_TRIANGLE_STRIP:       drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>      (state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
1459                                 default:
1460                                         DE_ASSERT(DE_FALSE);
1461                         }
1462
1463                         // Next primitive
1464                         primitiveBegin = primitiveEnd + 1;
1465                 }
1466         }
1467 }
1468
1469 /*--------------------------------------------------------------------*//*!
1470  * Assembles, tesselates, runs geometry shader and draws primitives of any type from vertex list.
1471  *//*--------------------------------------------------------------------*/
1472 template <PrimitiveType DrawPrimitiveType>
1473 void drawAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, int numVertices, DrawContext& drawContext, VertexPacketAllocator& vpalloc)
1474 {
1475         // Assemble primitives (deconstruct stips & loops)
1476         const size_t                                                                                                                    assemblerPrimitiveCount         = PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
1477         std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>              inputPrimitives                         (assemblerPrimitiveCount);
1478
1479         PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, (size_t)numVertices, state.provokingVertexConvention);
1480
1481         // Tesselate
1482         //if (state.tesselation)
1483         //      primList = state.tesselation.exec(primList);
1484
1485         // Geometry shader
1486         if (program.geometryShader)
1487         {
1488                 // If there is an active geometry shader, it will convert any primitive type to basic types
1489                 drawWithGeometryShader<DrawPrimitiveType>(state, renderTarget, program, inputPrimitives, drawContext);
1490         }
1491         else
1492         {
1493                 std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType> basePrimitives;
1494
1495                 // convert types from X_adjacency to X
1496                 convertPrimitiveToBaseType(basePrimitives, inputPrimitives);
1497
1498                 // Make shared vertices distinct. Needed for that the translation to screen space happens only once per vertex, and for flatshading
1499                 makeSharedVerticesDistinct(basePrimitives, vpalloc);
1500
1501                 // A primitive ID will be generated even if no geometry shader is active
1502                 generatePrimitiveIDs(basePrimitives, drawContext);
1503
1504                 // Draw as a basic type
1505                 drawBasicPrimitives(state, renderTarget, program, basePrimitives, vpalloc);
1506         }
1507 }
1508
1509 bool isValidCommand (const DrawCommand& command, int numInstances)
1510 {
1511         // numInstances should be valid
1512         if (numInstances < 0)
1513                 return false;
1514
1515         // Shaders should have the same varyings
1516         if (command.program.geometryShader)
1517         {
1518                 if (command.program.vertexShader->getOutputs() != command.program.geometryShader->getInputs())
1519                         return false;
1520
1521                 if (command.program.geometryShader->getOutputs() != command.program.fragmentShader->getInputs())
1522                         return false;
1523         }
1524         else
1525         {
1526                 if (command.program.vertexShader->getOutputs() != command.program.fragmentShader->getInputs())
1527                         return false;
1528         }
1529
1530         // Shader input/output types are set
1531         for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getInputs().size(); ++varyingNdx)
1532                 if (command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1533                         command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1534                         command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1535                         return false;
1536         for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getOutputs().size(); ++varyingNdx)
1537                 if (command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1538                         command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1539                         command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1540                         return false;
1541
1542         for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getInputs().size(); ++varyingNdx)
1543                 if (command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1544                         command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1545                         command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1546                         return false;
1547         for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
1548                 if (command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1549                         command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1550                         command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1551                         return false;
1552
1553         if (command.program.geometryShader)
1554         {
1555                 for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getInputs().size(); ++varyingNdx)
1556                         if (command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1557                                 command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1558                                 command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1559                                 return false;
1560                 for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getOutputs().size(); ++varyingNdx)
1561                         if (command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1562                                 command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1563                                 command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1564                                 return false;
1565         }
1566
1567         // Enough vertex inputs?
1568         if ((size_t)command.numVertexAttribs < command.program.vertexShader->getInputs().size())
1569                 return false;
1570
1571         // There is a fragment output sink for each output?
1572         if ((size_t)command.renderTarget.getNumColorBuffers() < command.program.fragmentShader->getOutputs().size())
1573                 return false;
1574
1575         // All destination buffers should have same number of samples and same size
1576         for (int outputNdx = 0; outputNdx < command.renderTarget.getNumColorBuffers(); ++outputNdx)
1577         {
1578                 if (getBufferSize(command.renderTarget.getColorBuffer(0)) != getBufferSize(command.renderTarget.getColorBuffer(outputNdx)))
1579                         return false;
1580
1581                 if (command.renderTarget.getNumSamples() != command.renderTarget.getColorBuffer(outputNdx).getNumSamples())
1582                         return false;
1583         }
1584
1585         // All destination buffers should have same basic type as matching fragment output
1586         for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
1587         {
1588                 const tcu::TextureChannelClass  colorbufferClass = tcu::getTextureChannelClass(command.renderTarget.getColorBuffer((int)varyingNdx).raw().getFormat().type);
1589                 const GenericVecType                    colorType                = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
1590
1591                 if (command.program.fragmentShader->getOutputs()[varyingNdx].type != colorType)
1592                         return false;
1593         }
1594
1595         // Integer values are flatshaded
1596         for (size_t outputNdx = 0; outputNdx < command.program.vertexShader->getOutputs().size(); ++outputNdx)
1597         {
1598                 if (!command.program.vertexShader->getOutputs()[outputNdx].flatshade &&
1599                         (command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
1600                          command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
1601                         return false;
1602         }
1603         if (command.program.geometryShader)
1604                 for (size_t outputNdx = 0; outputNdx < command.program.geometryShader->getOutputs().size(); ++outputNdx)
1605                 {
1606                         if (!command.program.geometryShader->getOutputs()[outputNdx].flatshade &&
1607                                 (command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
1608                                  command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
1609                                 return false;
1610                 }
1611
1612         // Draw primitive is valid for geometry shader
1613         if (command.program.geometryShader)
1614         {
1615                 if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_POINTS && command.primitives.getPrimitiveType() != PRIMITIVETYPE_POINTS)
1616                         return false;
1617
1618                 if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES &&
1619                         (command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES &&
1620                          command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP &&
1621                          command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_LOOP))
1622                         return false;
1623
1624                 if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES &&
1625                         (command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES &&
1626                          command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP &&
1627                          command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_FAN))
1628                         return false;
1629
1630                 if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES_ADJACENCY &&
1631                         (command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES_ADJACENCY &&
1632                          command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP_ADJACENCY))
1633                         return false;
1634
1635                 if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES_ADJACENCY &&
1636                         (command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES_ADJACENCY &&
1637                          command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY))
1638                         return false;
1639         }
1640
1641         return true;
1642 }
1643
1644 } // anonymous
1645
1646 RenderTarget::RenderTarget (const MultisamplePixelBufferAccess& colorMultisampleBuffer,
1647                                                         const MultisamplePixelBufferAccess& depthMultisampleBuffer,
1648                                                         const MultisamplePixelBufferAccess& stencilMultisampleBuffer)
1649         : m_numColorBuffers     (1)
1650         , m_depthBuffer         (MultisamplePixelBufferAccess::fromMultisampleAccess(tcu::getEffectiveDepthStencilAccess(depthMultisampleBuffer.raw(), tcu::Sampler::MODE_DEPTH)))
1651         , m_stencilBuffer       (MultisamplePixelBufferAccess::fromMultisampleAccess(tcu::getEffectiveDepthStencilAccess(stencilMultisampleBuffer.raw(), tcu::Sampler::MODE_STENCIL)))
1652 {
1653         m_colorBuffers[0] = colorMultisampleBuffer;
1654 }
1655
1656 int RenderTarget::getNumSamples (void) const
1657 {
1658         DE_ASSERT(m_numColorBuffers > 0);
1659         return m_colorBuffers[0].getNumSamples();
1660 }
1661
1662 DrawIndices::DrawIndices (const deUint32* ptr, int baseVertex_)
1663         : indices       (ptr)
1664         , indexType     (INDEXTYPE_UINT32)
1665         , baseVertex(baseVertex_)
1666 {
1667 }
1668
1669 DrawIndices::DrawIndices (const deUint16* ptr, int baseVertex_)
1670         : indices       (ptr)
1671         , indexType     (INDEXTYPE_UINT16)
1672         , baseVertex(baseVertex_)
1673 {
1674 }
1675
1676 DrawIndices::DrawIndices (const deUint8* ptr, int baseVertex_)
1677         : indices       (ptr)
1678         , indexType     (INDEXTYPE_UINT8)
1679         , baseVertex(baseVertex_)
1680 {
1681 }
1682
1683 DrawIndices::DrawIndices (const void* ptr, IndexType type, int baseVertex_)
1684         : indices       (ptr)
1685         , indexType     (type)
1686         , baseVertex(baseVertex_)
1687 {
1688 }
1689
1690 PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const int firstElement)
1691         : m_primitiveType       (primitiveType)
1692         , m_numElements         (numElements)
1693         , m_indices                     (DE_NULL)
1694         , m_indexType           (INDEXTYPE_LAST)
1695         , m_baseVertex          (firstElement)
1696 {
1697         DE_ASSERT(numElements >= 0 && "Invalid numElements");
1698         DE_ASSERT(firstElement >= 0 && "Invalid firstElement");
1699 }
1700
1701 PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const DrawIndices& indices)
1702         : m_primitiveType       (primitiveType)
1703         , m_numElements         ((size_t)numElements)
1704         , m_indices                     (indices.indices)
1705         , m_indexType           (indices.indexType)
1706         , m_baseVertex          (indices.baseVertex)
1707 {
1708         DE_ASSERT(numElements >= 0 && "Invalid numElements");
1709 }
1710
1711 size_t PrimitiveList::getIndex (size_t elementNdx) const
1712 {
1713         // indices == DE_NULL interpreted as command.indices = [first (=baseVertex) + 0, first + 1, first + 2...]
1714         if (m_indices)
1715         {
1716                 int index = m_baseVertex + (int)readIndexArray(m_indexType, m_indices, elementNdx);
1717                 DE_ASSERT(index >= 0); // do not access indices < 0
1718
1719                 return (size_t)index;
1720         }
1721         else
1722                 return (size_t)(m_baseVertex) + elementNdx;
1723 }
1724
1725 bool PrimitiveList::isRestartIndex (size_t elementNdx, deUint32 restartIndex) const
1726 {
1727         // implicit index or explicit index (without base vertex) equals restart
1728         if (m_indices)
1729                 return readIndexArray(m_indexType, m_indices, elementNdx) == restartIndex;
1730         else
1731                 return elementNdx == (size_t)restartIndex;
1732 }
1733
1734 Renderer::Renderer (void)
1735 {
1736 }
1737
1738 Renderer::~Renderer (void)
1739 {
1740 }
1741
1742 void Renderer::draw (const DrawCommand& command) const
1743 {
1744         drawInstanced(command, 1);
1745 }
1746
1747 void Renderer::drawInstanced (const DrawCommand& command, int numInstances) const
1748 {
1749         // Do not run bad commands
1750         {
1751                 const bool validCommand = isValidCommand(command, numInstances);
1752                 if (!validCommand)
1753                 {
1754                         DE_ASSERT(false);
1755                         return;
1756                 }
1757         }
1758
1759         // Do not draw if nothing to draw
1760         {
1761                 if (command.primitives.getNumElements() == 0 || numInstances == 0)
1762                         return;
1763         }
1764
1765         // Prepare transformation
1766
1767         const size_t                            numVaryings = command.program.vertexShader->getOutputs().size();
1768         VertexPacketAllocator           vpalloc(numVaryings);
1769         std::vector<VertexPacket*>      vertexPackets = vpalloc.allocArray(command.primitives.getNumElements());
1770         DrawContext                                     drawContext;
1771
1772         for (int instanceID = 0; instanceID < numInstances; ++instanceID)
1773         {
1774                 // Each instance has its own primitives
1775                 drawContext.primitiveID = 0;
1776
1777                 for (size_t elementNdx = 0; elementNdx < command.primitives.getNumElements(); ++elementNdx)
1778                 {
1779                         int numVertexPackets = 0;
1780
1781                         // collect primitive vertices until restart
1782
1783                         while (elementNdx < command.primitives.getNumElements() &&
1784                                         !(command.state.restart.enabled && command.primitives.isRestartIndex(elementNdx, command.state.restart.restartIndex)))
1785                         {
1786                                 // input
1787                                 vertexPackets[numVertexPackets]->instanceNdx    = instanceID;
1788                                 vertexPackets[numVertexPackets]->vertexNdx              = (int)command.primitives.getIndex(elementNdx);
1789
1790                                 // output
1791                                 vertexPackets[numVertexPackets]->pointSize              = command.state.point.pointSize;        // default value from the current state
1792                                 vertexPackets[numVertexPackets]->position               = tcu::Vec4(0, 0, 0, 0);                        // no undefined values
1793
1794                                 ++numVertexPackets;
1795                                 ++elementNdx;
1796                         }
1797
1798                         // Duplicated restart shade
1799                         if (numVertexPackets == 0)
1800                                 continue;
1801
1802                         // \todo Vertex cache?
1803
1804                         // Transform vertices
1805
1806                         command.program.vertexShader->shadeVertices(command.vertexAttribs, &vertexPackets[0], numVertexPackets);
1807
1808                         // Draw primitives
1809
1810                         switch (command.primitives.getPrimitiveType())
1811                         {
1812                                 case PRIMITIVETYPE_TRIANGLES:                           { drawAsPrimitives<PRIMITIVETYPE_TRIANGLES>                                     (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1813                                 case PRIMITIVETYPE_TRIANGLE_STRIP:                      { drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>                        (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1814                                 case PRIMITIVETYPE_TRIANGLE_FAN:                        { drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_FAN>                          (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1815                                 case PRIMITIVETYPE_LINES:                                       { drawAsPrimitives<PRIMITIVETYPE_LINES>                                         (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1816                                 case PRIMITIVETYPE_LINE_STRIP:                          { drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP>                            (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1817                                 case PRIMITIVETYPE_LINE_LOOP:                           { drawAsPrimitives<PRIMITIVETYPE_LINE_LOOP>                                     (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1818                                 case PRIMITIVETYPE_POINTS:                                      { drawAsPrimitives<PRIMITIVETYPE_POINTS>                                        (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1819                                 case PRIMITIVETYPE_LINES_ADJACENCY:                     { drawAsPrimitives<PRIMITIVETYPE_LINES_ADJACENCY>                       (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1820                                 case PRIMITIVETYPE_LINE_STRIP_ADJACENCY:        { drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP_ADJACENCY>          (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1821                                 case PRIMITIVETYPE_TRIANGLES_ADJACENCY:         { drawAsPrimitives<PRIMITIVETYPE_TRIANGLES_ADJACENCY>           (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1822                                 case PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY:{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY>  (command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);      break; }
1823                                 default:
1824                                         DE_ASSERT(DE_FALSE);
1825                         }
1826                 }
1827         }
1828 }
1829
1830 } // rr