Merge "Add the support to device connection via TCP/IP" into marshmallow-cts-dev...
[platform/upstream/VK-GL-CTS.git] / framework / referencerenderer / rrFragmentOperations.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Reference Renderer
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Reference implementation for per-fragment operations.
22  *//*--------------------------------------------------------------------*/
23
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27 #include <limits>
28
29 using tcu::IVec2;
30 using tcu::Vec3;
31 using tcu::Vec4;
32 using tcu::IVec4;
33 using tcu::UVec4;
34 using tcu::min;
35 using tcu::max;
36 using tcu::clamp;
37 using de::min;
38 using de::max;
39 using de::clamp;
40
41 namespace rr
42 {
43
44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
45 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
46 {
47         return (oldValue & ~mask) | (newValue & mask);
48 }
49
50 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
51 {
52         return de::inBounds(point.x(), rect.left,               rect.left + rect.width) &&
53                    de::inBounds(point.y(), rect.bottom,         rect.bottom + rect.height);
54 }
55
56 static inline Vec4 unpremultiply (const Vec4& v)
57 {
58         if (v.w() > 0.0f)
59                 return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
60         else
61         {
62                 DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
63                 return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
64         }
65 }
66
67 void clearMultisampleColorBuffer        (const tcu::PixelBufferAccess& dst, const Vec4& v,      const WindowRectangle& r)       { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                                }
68 void clearMultisampleColorBuffer        (const tcu::PixelBufferAccess& dst, const IVec4& v,     const WindowRectangle& r)       { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                                }
69 void clearMultisampleColorBuffer        (const tcu::PixelBufferAccess& dst, const UVec4& v,     const WindowRectangle& r)       { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>());    }
70 void clearMultisampleDepthBuffer        (const tcu::PixelBufferAccess& dst, float v,            const WindowRectangle& r)       { tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                   }
71 void clearMultisampleStencilBuffer      (const tcu::PixelBufferAccess& dst, int v,                      const WindowRectangle& r)       { tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                 }
72
73 FragmentProcessor::FragmentProcessor (void)
74         : m_sampleRegister()
75 {
76 }
77
78 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
79 {
80         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
81         {
82                 if (m_sampleRegister[regSampleNdx].isAlive)
83                 {
84                         int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
85
86                         if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
87                                 m_sampleRegister[regSampleNdx].isAlive = false;
88                 }
89         }
90 }
91
92 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
93 {
94 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION)                                                                                                                                                                     \
95         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                 \
96         {                                                                                                                                                                                                                                                                               \
97                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                                                     \
98                 {                                                                                                                                                                                                                                                                       \
99                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                 \
100                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                   \
101                         int                                     stencilBufferValue      = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
102                         int                                     maskedRef                       = stencilState.compMask & clampedStencilRef;                                                                                    \
103                         int                                     maskedBuf                       = stencilState.compMask & stencilBufferValue;                                                                                   \
104                         DE_UNREF(maskedRef);                                                                                                                                                                                                                    \
105                         DE_UNREF(maskedBuf);                                                                                                                                                                                                                    \
106                                                                                                                                                                                                                                                                                         \
107                         m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION);                                                                                                                    \
108                 }                                                                                                                                                                                                                                                                       \
109         }
110
111         int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
112
113         switch (stencilState.func)
114         {
115                 case TESTFUNC_NEVER:    SAMPLE_REGISTER_STENCIL_COMPARE(false)                                          break;
116                 case TESTFUNC_ALWAYS:   SAMPLE_REGISTER_STENCIL_COMPARE(true)                                           break;
117                 case TESTFUNC_LESS:             SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <  maskedBuf)         break;
118                 case TESTFUNC_LEQUAL:   SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf)         break;
119                 case TESTFUNC_GREATER:  SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >  maskedBuf)         break;
120                 case TESTFUNC_GEQUAL:   SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf)         break;
121                 case TESTFUNC_EQUAL:    SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf)         break;
122                 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf)         break;
123                 default:
124                         DE_ASSERT(false);
125         }
126
127 #undef SAMPLE_REGISTER_STENCIL_COMPARE
128 }
129
130 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
131 {
132 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION)                                                                                                                                                                                                                                                                         \
133         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                                                                                                 \
134         {                                                                                                                                                                                                                                                                                                                                                               \
135                 if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed)                                                                                                                                                            \
136                 {                                                                                                                                                                                                                                                                                                                                                       \
137                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                                                                                                 \
138                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                                                                                                   \
139                         int                                     stencilBufferValue      = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());                                                                                 \
140                                                                                                                                                                                                                                                                                                                                                                         \
141                         stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
142                         m_sampleRegister[regSampleNdx].isAlive = false;                                                                                                                                                                                                                                                 \
143                 }                                                                                                                                                                                                                                                                                                                                                       \
144         }
145
146         int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
147
148         switch (stencilState.sFail)
149         {
150                 case STENCILOP_KEEP:            SAMPLE_REGISTER_SFAIL(stencilBufferValue)                                                                                               break;
151                 case STENCILOP_ZERO:            SAMPLE_REGISTER_SFAIL(0)                                                                                                                                break;
152                 case STENCILOP_REPLACE:         SAMPLE_REGISTER_SFAIL(clampedStencilRef)                                                                                                break;
153                 case STENCILOP_INCR:            SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))              break;
154                 case STENCILOP_DECR:            SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))              break;
155                 case STENCILOP_INCR_WRAP:       SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1))                             break;
156                 case STENCILOP_DECR_WRAP:       SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1))                             break;
157                 case STENCILOP_INVERT:          SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1))                                break;
158                 default:
159                         DE_ASSERT(false);
160         }
161
162 #undef SAMPLE_REGISTER_SFAIL
163 }
164
165 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
166 {
167 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION)                                                                                                                                                                             \
168         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                         \
169         {                                                                                                                                                                                                                                                                                       \
170                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                                                             \
171                 {                                                                                                                                                                                                                                                                               \
172                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                         \
173                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                           \
174                         float                           depthBufferValue        = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());                     \
175                         float                           sampleDepthFloat        = frag.sampleDepths[fragSampleNdx];                                                                                                                     \
176                         float                           sampleDepth                     = de::clamp(sampleDepthFloat, 0.0f, 1.0f);                                                                                                      \
177                                                                                                                                                                                                                                                                                                 \
178                         m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);                                                                                                                                      \
179                                                                                                                                                                                                                                                                                                 \
180                         DE_UNREF(depthBufferValue);                                                                                                                                                                                                                     \
181                         DE_UNREF(sampleDepth);                                                                                                                                                                                                                          \
182                 }                                                                                                                                                                                                                                                                               \
183         }
184
185 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION)                                                                                                                                                                    \
186         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                         \
187         {                                                                                                                                                                                                                                                                                       \
188                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                                                             \
189                 {                                                                                                                                                                                                                                                                               \
190                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                         \
191                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                           \
192                         deUint32                        depthBufferValue        = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();        \
193                         float                           sampleDepthFloat        = frag.sampleDepths[fragSampleNdx];                                                                                                                     \
194                                                                                                                                                                                                                                                                                                 \
195                         /* Convert input float to target buffer format for comparison */                                                                                                                                        \
196                                                                                                                                                                                                                                                                                                 \
197                         deUint32 buffer[2];                                                                                                                                                                                                                                     \
198                                                                                                                                                                                                                                                                                                 \
199                         DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());                                                                                                            \
200                                                                                                                                                                                                                                                                                                 \
201                         tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);                                                                                                                       \
202                         access.setPixDepth(sampleDepthFloat, 0, 0, 0);                                                                                                                                                                          \
203                         deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x();                                                                                                                                                        \
204                                                                                                                                                                                                                                                                                                 \
205                         m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);                                                                                                                                      \
206                                                                                                                                                                                                                                                                                                 \
207                         DE_UNREF(depthBufferValue);                                                                                                                                                                                                                     \
208                         DE_UNREF(sampleDepth);                                                                                                                                                                                                                          \
209                 }                                                                                                                                                                                                                                                                               \
210         }
211
212         if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
213         {
214
215                 switch (depthFunc)
216                 {
217                         case TESTFUNC_NEVER:    SAMPLE_REGISTER_DEPTH_COMPARE_F(false)                                                  break;
218                         case TESTFUNC_ALWAYS:   SAMPLE_REGISTER_DEPTH_COMPARE_F(true)                                                           break;
219                         case TESTFUNC_LESS:             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <  depthBufferValue)        break;
220                         case TESTFUNC_LEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue)        break;
221                         case TESTFUNC_GREATER:  SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >  depthBufferValue)        break;
222                         case TESTFUNC_GEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue)        break;
223                         case TESTFUNC_EQUAL:    SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue)        break;
224                         case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue)        break;
225                         default:
226                                 DE_ASSERT(false);
227                 }
228
229         }
230         else
231         {
232                 switch (depthFunc)
233                 {
234                         case TESTFUNC_NEVER:    SAMPLE_REGISTER_DEPTH_COMPARE_UI(false)                                                 break;
235                         case TESTFUNC_ALWAYS:   SAMPLE_REGISTER_DEPTH_COMPARE_UI(true)                                                          break;
236                         case TESTFUNC_LESS:             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <  depthBufferValue)       break;
237                         case TESTFUNC_LEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue)       break;
238                         case TESTFUNC_GREATER:  SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >  depthBufferValue)       break;
239                         case TESTFUNC_GEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue)       break;
240                         case TESTFUNC_EQUAL:    SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue)       break;
241                         case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue)       break;
242                         default:
243                                 DE_ASSERT(false);
244                 }
245         }
246
247 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
248 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
249 }
250
251 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
252 {
253         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
254         {
255                 if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
256                 {
257                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
258                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
259                         const float                     clampedDepth    = de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
260
261                         depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
262                 }
263         }
264 }
265
266 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
267 {
268 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION)                                                                                                                                                                                                                                 \
269         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                                                                                         \
270         {                                                                                                                                                                                                                                                                                                                                                       \
271                 if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION))                                                                                                                                                                                                                              \
272                 {                                                                                                                                                                                                                                                                                                                                               \
273                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                                                                                         \
274                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                                                                                           \
275                         int                                     stencilBufferValue      = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());                                                                         \
276                                                                                                                                                                                                                                                                                                                                                                 \
277                         stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());       \
278                 }                                                                                                                                                                                                                                                                                                                                               \
279         }
280
281 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION)                                                                                                                                                                                                                     \
282                 switch (stencilState.OP_NAME)                                                                                                                                                                                                                                           \
283                 {                                                                                                                                                                                                                                                                                                       \
284                         case STENCILOP_KEEP:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue)                                                                                         break;  \
285                         case STENCILOP_ZERO:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0)                                                                                                                          break;  \
286                         case STENCILOP_REPLACE:         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef)                                                                                          break;  \
287                         case STENCILOP_INCR:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))        break;  \
288                         case STENCILOP_DECR:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))        break;  \
289                         case STENCILOP_INCR_WRAP:       SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1))                       break;  \
290                         case STENCILOP_DECR_WRAP:       SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1))                       break;  \
291                         case STENCILOP_INVERT:          SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1))                          break;  \
292                         default:                                                                                                                                                                                                                                                                                \
293                                 DE_ASSERT(false);                                                                                                                                                                                                                                                       \
294                 }
295
296         int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
297
298         SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
299         SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
300
301 #undef SWITCH_DPFAIL_OR_DPPASS
302 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
303 }
304
305 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
306 {
307 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)                                                                                                                                                            \
308         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                                 \
309         {                                                                                                                                                                                                                                                                                               \
310                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                                                                     \
311                 {                                                                                                                                                                                                                                                                                       \
312                         const Vec4& src         = m_sampleRegister[regSampleNdx].clampedBlendSrcColor;                                                                                                                          \
313                         const Vec4& src1        = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;                                                                                                                         \
314                         const Vec4& dst         = m_sampleRegister[regSampleNdx].clampedBlendDstColor;                                                                                                                          \
315                         DE_UNREF(src);                                                                                                                                                                                                                                                  \
316                         DE_UNREF(src1);                                                                                                                                                                                                                                                 \
317                         DE_UNREF(dst);                                                                                                                                                                                                                                                  \
318                                                                                                                                                                                                                                                                                                         \
319                         m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);                                                                                                                                               \
320                 }                                                                                                                                                                                                                                                                                       \
321         }
322
323 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME)                                                                                                                                                                    \
324         switch (blendRGBState.FUNC_NAME)                                                                                                                                                                                                                        \
325         {                                                                                                                                                                                                                                                                                       \
326                 case BLENDFUNC_ZERO:                                            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f))                                                           break;  \
327                 case BLENDFUNC_ONE:                                                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f))                                                           break;  \
328                 case BLENDFUNC_SRC_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2))                                           break;  \
329                 case BLENDFUNC_ONE_MINUS_SRC_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2))                      break;  \
330                 case BLENDFUNC_DST_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2))                                           break;  \
331                 case BLENDFUNC_ONE_MINUS_DST_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2))                      break;  \
332                 case BLENDFUNC_SRC_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w()))                                                        break;  \
333                 case BLENDFUNC_ONE_MINUS_SRC_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w()))                                         break;  \
334                 case BLENDFUNC_DST_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w()))                                                        break;  \
335                 case BLENDFUNC_ONE_MINUS_DST_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w()))                                         break;  \
336                 case BLENDFUNC_CONSTANT_COLOR:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2))                            break;  \
337                 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2))       break;  \
338                 case BLENDFUNC_CONSTANT_ALPHA:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w()))                                         break;  \
339                 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w()))                          break;  \
340                 case BLENDFUNC_SRC_ALPHA_SATURATE:                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w())))       break;  \
341                 case BLENDFUNC_SRC1_COLOR:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2))                                          break;  \
342                 case BLENDFUNC_ONE_MINUS_SRC1_COLOR:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2))                     break;  \
343                 case BLENDFUNC_SRC1_ALPHA:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w()))                                                       break;  \
344                 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w()))                                        break;  \
345                 default:                                                                                                                                                                                                                                                                \
346                         DE_ASSERT(false);                                                                                                                                                                                                                                       \
347         }
348
349         SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
350         SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
351
352 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
353 #undef SAMPLE_REGISTER_BLEND_FACTOR
354 }
355
356 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
357 {
358 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)                                                                                                            \
359         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                 \
360         {                                                                                                                                                                                                                                               \
361                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                     \
362                 {                                                                                                                                                                                                                                       \
363                         const Vec4& src         = m_sampleRegister[regSampleNdx].clampedBlendSrcColor;                                                                          \
364                         const Vec4& src1        = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;                                                                         \
365                         const Vec4& dst         = m_sampleRegister[regSampleNdx].clampedBlendDstColor;                                                                          \
366                         DE_UNREF(src);                                                                                                                                                                                                  \
367                         DE_UNREF(src1);                                                                                                                                                                                                 \
368                         DE_UNREF(dst);                                                                                                                                                                                                  \
369                                                                                                                                                                                                                                                         \
370                         m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);                                                                                               \
371                 }                                                                                                                                                                                                                                       \
372         }
373
374 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME)                                                                                                                                              \
375         switch (blendAState.FUNC_NAME)                                                                                                                                                                                          \
376         {                                                                                                                                                                                                                                                       \
377                 case BLENDFUNC_ZERO:                                            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f)                                         break;  \
378                 case BLENDFUNC_ONE:                                                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)                                         break;  \
379                 case BLENDFUNC_SRC_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())                                      break;  \
380                 case BLENDFUNC_ONE_MINUS_SRC_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())                       break;  \
381                 case BLENDFUNC_DST_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())                                      break;  \
382                 case BLENDFUNC_ONE_MINUS_DST_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())                       break;  \
383                 case BLENDFUNC_SRC_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())                                      break;  \
384                 case BLENDFUNC_ONE_MINUS_SRC_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())                       break;  \
385                 case BLENDFUNC_DST_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())                                      break;  \
386                 case BLENDFUNC_ONE_MINUS_DST_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())                       break;  \
387                 case BLENDFUNC_CONSTANT_COLOR:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())                       break;  \
388                 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())        break;  \
389                 case BLENDFUNC_CONSTANT_ALPHA:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())                       break;  \
390                 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())        break;  \
391                 case BLENDFUNC_SRC_ALPHA_SATURATE:                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)                                         break;  \
392                 case BLENDFUNC_SRC1_COLOR:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())                                     break;  \
393                 case BLENDFUNC_ONE_MINUS_SRC1_COLOR:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())                      break;  \
394                 case BLENDFUNC_SRC1_ALPHA:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())                                     break;  \
395                 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())                      break;  \
396                 default:                                                                                                                                                                                                                                \
397                         DE_ASSERT(false);                                                                                                                                                                                                       \
398         }
399
400         SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
401         SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
402
403 #undef SWITCH_SRC_OR_DST_FACTOR_A
404 #undef SAMPLE_REGISTER_BLEND_FACTOR
405 }
406
407 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
408 {
409 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION)                                             \
410         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)         \
411         {                                                                                                                                                                       \
412                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                             \
413                 {                                                                                                                                                               \
414                         SampleData& sample              = m_sampleRegister[regSampleNdx];                                       \
415                         const Vec4& srcColor    = sample.clampedBlendSrcColor;                                          \
416                         const Vec4& dstColor    = sample.clampedBlendDstColor;                                          \
417                                                                                                                                                                                 \
418                         sample.COLOR_NAME = (COLOR_EXPRESSION);                                                                         \
419                 }                                                                                                                                                               \
420         }
421
422         switch (blendRGBState.equation)
423         {
424                 case BLENDEQUATION_ADD:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)  break;
425                 case BLENDEQUATION_SUBTRACT:                    SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)  break;
426                 case BLENDEQUATION_REVERSE_SUBTRACT:    SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB)  break;
427                 case BLENDEQUATION_MIN:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))                                                                                                break;
428                 case BLENDEQUATION_MAX:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))                                                                                                break;
429                 default:
430                         DE_ASSERT(false);
431         }
432
433         switch (blendAState.equation)
434         {
435                 case BLENDEQUATION_ADD:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA)      break;
436                 case BLENDEQUATION_SUBTRACT:                    SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA)      break;
437                 case BLENDEQUATION_REVERSE_SUBTRACT:    SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA)      break;
438                 case BLENDEQUATION_MIN:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w()))                                                                                        break;
439                 case BLENDEQUATION_MAX:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w()))                                                                                        break;
440                 default:
441                         DE_ASSERT(false);
442         }
443 #undef SAMPLE_REGISTER_BLENDED_COLOR
444 }
445
446 namespace advblend
447 {
448
449 inline float    multiply        (float src, float dst) { return src*dst;                                        }
450 inline float    screen          (float src, float dst) { return src + dst - src*dst;            }
451 inline float    darken          (float src, float dst) { return de::min(src, dst);                      }
452 inline float    lighten         (float src, float dst) { return de::max(src, dst);                      }
453 inline float    difference      (float src, float dst) { return de::abs(dst-src);                       }
454 inline float    exclusion       (float src, float dst) { return src + dst - 2.0f*src*dst;       }
455
456 inline float overlay (float src, float dst)
457 {
458         if (dst <= 0.5f)
459                 return 2.0f*src*dst;
460         else
461                 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
462 }
463
464 inline float colordodge (float src, float dst)
465 {
466         if (dst <= 0.0f)
467                 return 0.0f;
468         else if (src < 1.0f)
469                 return de::min(1.0f, dst/(1.0f-src));
470         else
471                 return 1.0f;
472 }
473
474 inline float colorburn (float src, float dst)
475 {
476         if (dst >= 1.0f)
477                 return 1.0f;
478         else if (src > 0.0f)
479                 return 1.0f - de::min(1.0f, (1.0f-dst)/src);
480         else
481                 return 0.0f;
482 }
483
484 inline float hardlight (float src, float dst)
485 {
486         if (src <= 0.5f)
487                 return 2.0f*src*dst;
488         else
489                 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
490 }
491
492 inline float softlight (float src, float dst)
493 {
494         if (src <= 0.5f)
495                 return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
496         else if (dst <= 0.25f)
497                 return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
498         else
499                 return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
500 }
501
502 inline float minComp (const Vec3& v)
503 {
504         return de::min(de::min(v.x(), v.y()), v.z());
505 }
506
507 inline float maxComp (const Vec3& v)
508 {
509         return de::max(de::max(v.x(), v.y()), v.z());
510 }
511
512 inline float luminosity (const Vec3& rgb)
513 {
514         return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
515 }
516
517 inline float saturation (const Vec3& rgb)
518 {
519         return maxComp(rgb) - minComp(rgb);
520 }
521
522 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
523 {
524         const float             lbase   = luminosity(cbase);
525         const float             llum    = luminosity(clum);
526         const float             ldiff   = llum - lbase;
527         const Vec3              color   = cbase + Vec3(ldiff);
528         const float             minC    = minComp(color);
529         const float             maxC    = maxComp(color);
530
531         if (minC < 0.0f)
532                 return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
533         else if (maxC > 1.0f)
534                 return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
535         else
536                 return color;
537 }
538
539 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
540 {
541         const float             minbase = minComp(cbase);
542         const float             sbase   = saturation(cbase);
543         const float             ssat    = saturation(csat);
544         Vec3                    color   = Vec3(0.0f);
545
546         if (sbase > 0.0f)
547                 color = (cbase - minbase) * ssat / sbase;
548         else
549                 color = color;
550
551         return setLum(color, clum);
552 }
553
554 } // advblend
555
556 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
557 {
558         using namespace advblend;
559
560 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME)                                                                                        \
561         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                 \
562         {                                                                                                                                                                               \
563                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                     \
564                 {                                                                                                                                                                       \
565                         SampleData&     sample          = m_sampleRegister[regSampleNdx];                                               \
566                         const Vec4&     srcColor        = sample.clampedBlendSrcColor;                                                  \
567                         const Vec4&     dstColor        = sample.clampedBlendDstColor;                                                  \
568                         const Vec3&     bias            = sample.blendSrcFactorRGB;                                                             \
569                         const float     p0                      = sample.blendSrcFactorA;                                                               \
570                         const float     r                       = FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0]; \
571                         const float     g                       = FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1]; \
572                         const float     b                       = FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2]; \
573                                                                                                                                                                                         \
574                         sample.blendedRGB = Vec3(r, g, b);                                                                                              \
575                 }                                                                                                                                                                       \
576         }
577
578 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION)                                                                         \
579         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                 \
580         {                                                                                                                                                                               \
581                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                     \
582                 {                                                                                                                                                                       \
583                         SampleData&     sample          = m_sampleRegister[regSampleNdx];                                               \
584                         const Vec3      srcColor        = sample.clampedBlendSrcColor.swizzle(0,1,2);                   \
585                         const Vec3      dstColor        = sample.clampedBlendDstColor.swizzle(0,1,2);                   \
586                         const Vec3&     bias            = sample.blendSrcFactorRGB;                                                             \
587                         const float     p0                      = sample.blendSrcFactorA;                                                               \
588                                                                                                                                                                                         \
589                         sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias;                                                               \
590                 }                                                                                                                                                                       \
591         }
592
593         // Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
594         // \note clampedBlend*Color contains clamped & unpremultiplied colors
595         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
596         {
597                 if (m_sampleRegister[regSampleNdx].isAlive)
598                 {
599                         SampleData&     sample          = m_sampleRegister[regSampleNdx];
600                         const Vec4&     srcColor        = sample.clampedBlendSrcColor;
601                         const Vec4&     dstColor        = sample.clampedBlendDstColor;
602                         const float     srcA            = srcColor.w();
603                         const float     dstA            = dstColor.w();
604                         const float     p0                      = srcA*dstA;
605                         const float p1                  = srcA*(1.0f-dstA);
606                         const float p2                  = dstA*(1.0f-srcA);
607                         const Vec3      bias            (srcColor[0]*p1 + dstColor[0]*p2,
608                                                                          srcColor[1]*p1 + dstColor[1]*p2,
609                                                                          srcColor[2]*p1 + dstColor[2]*p2);
610
611                         sample.blendSrcFactorRGB        = bias;
612                         sample.blendSrcFactorA          = p0;
613                         sample.blendedA                         = p0 + p1 + p2;
614                 }
615         }
616
617         switch (equation)
618         {
619                 case BLENDEQUATION_ADVANCED_MULTIPLY:           SAMPLE_REGISTER_ADV_BLEND(multiply);                                                                    break;
620                 case BLENDEQUATION_ADVANCED_SCREEN:                     SAMPLE_REGISTER_ADV_BLEND(screen);                                                                              break;
621                 case BLENDEQUATION_ADVANCED_OVERLAY:            SAMPLE_REGISTER_ADV_BLEND(overlay);                                                                             break;
622                 case BLENDEQUATION_ADVANCED_DARKEN:                     SAMPLE_REGISTER_ADV_BLEND(darken);                                                                              break;
623                 case BLENDEQUATION_ADVANCED_LIGHTEN:            SAMPLE_REGISTER_ADV_BLEND(lighten);                                                                             break;
624                 case BLENDEQUATION_ADVANCED_COLORDODGE:         SAMPLE_REGISTER_ADV_BLEND(colordodge);                                                                  break;
625                 case BLENDEQUATION_ADVANCED_COLORBURN:          SAMPLE_REGISTER_ADV_BLEND(colorburn);                                                                   break;
626                 case BLENDEQUATION_ADVANCED_HARDLIGHT:          SAMPLE_REGISTER_ADV_BLEND(hardlight);                                                                   break;
627                 case BLENDEQUATION_ADVANCED_SOFTLIGHT:          SAMPLE_REGISTER_ADV_BLEND(softlight);                                                                   break;
628                 case BLENDEQUATION_ADVANCED_DIFFERENCE:         SAMPLE_REGISTER_ADV_BLEND(difference);                                                                  break;
629                 case BLENDEQUATION_ADVANCED_EXCLUSION:          SAMPLE_REGISTER_ADV_BLEND(exclusion);                                                                   break;
630                 case BLENDEQUATION_ADVANCED_HSL_HUE:            SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor)); break;
631                 case BLENDEQUATION_ADVANCED_HSL_SATURATION:     SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor)); break;
632                 case BLENDEQUATION_ADVANCED_HSL_COLOR:          SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor));                              break;
633                 case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY:     SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor));                              break;
634                 default:
635                         DE_ASSERT(false);
636         }
637
638 #undef SAMPLE_REGISTER_ADV_BLEND
639 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
640 }
641
642 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
643 {
644         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
645         {
646                 if (m_sampleRegister[regSampleNdx].isAlive)
647                 {
648                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
649                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
650                         Vec4                            combinedColor;
651
652                         combinedColor.xyz()     = m_sampleRegister[regSampleNdx].blendedRGB;
653                         combinedColor.w()       = m_sampleRegister[regSampleNdx].blendedA;
654
655                         if (isSRGB)
656                                 combinedColor = tcu::linearToSRGB(combinedColor);
657
658                         colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
659                 }
660         }
661 }
662
663 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
664 {
665         const int               fragStride      = 4;
666         const int               xStride         = colorBuffer.getRowPitch();
667         const int               yStride         = colorBuffer.getSlicePitch();
668         deUint8* const  basePtr         = (deUint8*)colorBuffer.getDataPtr();
669
670         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
671         {
672                 if (m_sampleRegister[regSampleNdx].isAlive)
673                 {
674                         const int                       fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
675                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
676                         deUint8*                        dstPtr                  = basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
677
678                         dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
679                         dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
680                         dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
681                         dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
682                 }
683         }
684 }
685
686 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
687 {
688         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
689         {
690                 if (m_sampleRegister[regSampleNdx].isAlive)
691                 {
692                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
693                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
694                         Vec4                            originalColor   = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
695                         Vec4                            newColor;
696
697                         newColor.xyz()  = m_sampleRegister[regSampleNdx].blendedRGB;
698                         newColor.w()    = m_sampleRegister[regSampleNdx].blendedA;
699
700                         if (isSRGB)
701                                 newColor = tcu::linearToSRGB(newColor);
702
703                         newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
704
705                         colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
706                 }
707         }
708 }
709
710 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
711 {
712         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
713         {
714                 if (m_sampleRegister[regSampleNdx].isAlive)
715                 {
716                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
717                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
718                         const IVec4                     originalValue   = colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
719
720                         colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
721                 }
722         }
723 }
724
725 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
726 {
727         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
728         {
729                 if (m_sampleRegister[regSampleNdx].isAlive)
730                 {
731                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
732                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
733                         const UVec4                     originalValue   = colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
734
735                         colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
736                 }
737         }
738 }
739
740 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess&         msColorBuffer,
741                                                                 const rr::MultisamplePixelBufferAccess&         msDepthBuffer,
742                                                                 const rr::MultisamplePixelBufferAccess&         msStencilBuffer,
743                                                                 const Fragment*                                                         inputFragments,
744                                                                 int                                                                                     numFragments,
745                                                                 FaceType                                                                        fragmentFacing,
746                                                                 const FragmentOperationState&                           state)
747 {
748         DE_ASSERT(fragmentFacing < FACETYPE_LAST);
749         DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
750
751         const tcu::PixelBufferAccess&   colorBuffer                     = msColorBuffer.raw();
752         const tcu::PixelBufferAccess&   depthBuffer                     = msDepthBuffer.raw();
753         const tcu::PixelBufferAccess&   stencilBuffer           = msStencilBuffer.raw();
754
755         bool                                                    hasDepth                        = depthBuffer.getWidth() > 0    && depthBuffer.getHeight() > 0          && depthBuffer.getDepth() > 0;
756         bool                                                    hasStencil                      = stencilBuffer.getWidth() > 0  && stencilBuffer.getHeight() > 0        && stencilBuffer.getDepth() > 0;
757         bool                                                    doDepthTest                     = hasDepth && state.depthTestEnabled;
758         bool                                                    doStencilTest           = hasStencil && state.stencilTestEnabled;
759
760         tcu::TextureChannelClass                colorbufferClass        = tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
761         rr::GenericVecType                              fragmentDataType        = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
762
763         DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth())       && (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
764         DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight())     && (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
765         DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth())       && (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
766
767         // Combined formats must be separated beforehand
768         DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D));
769         DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S));
770
771         int                                             numSamplesPerFragment           = colorBuffer.getWidth();
772         int                                             totalNumSamples                         = numFragments*numSamplesPerFragment;
773         int                                             numSampleGroups                         = (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
774         const StencilState&             stencilState                            = state.stencilStates[fragmentFacing];
775         Vec4                                    colorMaskFactor                         (state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
776         Vec4                                    colorMaskNegationFactor         (state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
777         bool                                    sRGBTarget                                      = state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
778
779         DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
780
781         // Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
782         // the per-sample operations for one group at a time.
783
784         for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
785         {
786                 // The index of the fragment of the sample at the beginning of m_sampleRegisters.
787                 int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
788
789                 // Initialize sample data in the sample register.
790
791                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
792                 {
793                         int fragNdx                     = groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
794                         int fragSampleNdx       = regSampleNdx % numSamplesPerFragment;
795
796                         if (fragNdx < numFragments)
797                         {
798                                 m_sampleRegister[regSampleNdx].isAlive          = (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
799                                 m_sampleRegister[regSampleNdx].depthPassed      = true; // \note This will stay true if depth test is disabled.
800                         }
801                         else
802                                 m_sampleRegister[regSampleNdx].isAlive = false;
803                 }
804
805                 // Scissor test.
806
807                 if (state.scissorTestEnabled)
808                         executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
809
810                 // Stencil test.
811
812                 if (doStencilTest)
813                 {
814                         executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
815                         executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
816                 }
817
818                 // Depth test.
819                 // \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
820
821                 if (doDepthTest)
822                 {
823                         executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
824
825                         if (state.depthMask)
826                                 executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
827                 }
828
829                 // Do dpFail and dpPass stencil writes.
830
831                 if (doStencilTest)
832                         executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
833
834                 // Kill the samples that failed depth test.
835
836                 if (doDepthTest)
837                 {
838                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
839                                 m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
840                 }
841
842                 // Paint fragments to target
843
844                 switch (fragmentDataType)
845                 {
846                         case rr::GENERICVECTYPE_FLOAT:
847                         {
848                                 // Select min/max clamping values for blending factors and operands
849                                 Vec4 minClampValue;
850                                 Vec4 maxClampValue;
851
852                                 if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT)
853                                 {
854                                         minClampValue = Vec4(0.0f);
855                                         maxClampValue = Vec4(1.0f);
856                                 }
857                                 else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT)
858                                 {
859                                         minClampValue = Vec4(-1.0f);
860                                         maxClampValue = Vec4(1.0f);
861                                 }
862                                 else
863                                 {
864                                         // No clamping
865                                         minClampValue = Vec4(-std::numeric_limits<float>::infinity());
866                                         maxClampValue = Vec4(std::numeric_limits<float>::infinity());
867                                 }
868
869                                 // Blend calculation - only if using blend.
870                                 if (state.blendMode == BLENDMODE_STANDARD)
871                                 {
872                                         // Put dst color to register, doing srgb-to-linear conversion if needed.
873                                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
874                                         {
875                                                 if (m_sampleRegister[regSampleNdx].isAlive)
876                                                 {
877                                                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
878                                                         const Fragment&         frag                    = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
879                                                         Vec4                            dstColor                = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
880
881                                                         m_sampleRegister[regSampleNdx].clampedBlendSrcColor             = clamp(frag.value.get<float>(), minClampValue, maxClampValue);
882                                                         m_sampleRegister[regSampleNdx].clampedBlendSrc1Color    = clamp(frag.value1.get<float>(), minClampValue, maxClampValue);
883                                                         m_sampleRegister[regSampleNdx].clampedBlendDstColor             = clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue);
884                                                 }
885                                         }
886
887                                         // Calculate blend factors to register.
888                                         executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
889                                         executeBlendFactorComputeA(state.blendColor, state.blendAState);
890
891                                         // Compute blended color.
892                                         executeBlend(state.blendRGBState, state.blendAState);
893                                 }
894                                 else if (state.blendMode == BLENDMODE_ADVANCED)
895                                 {
896                                         // Unpremultiply colors for blending, and do sRGB->linear if necessary
897                                         // \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
898                                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
899                                         {
900                                                 if (m_sampleRegister[regSampleNdx].isAlive)
901                                                 {
902                                                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
903                                                         const Fragment&         frag                    = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
904                                                         const Vec4                      srcColor                = frag.value.get<float>();
905                                                         const Vec4                      dstColor                = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
906
907                                                         m_sampleRegister[regSampleNdx].clampedBlendSrcColor             = unpremultiply(clamp(srcColor, minClampValue, maxClampValue));
908                                                         m_sampleRegister[regSampleNdx].clampedBlendDstColor             = unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue));
909                                                 }
910                                         }
911
912                                         executeAdvancedBlend(state.blendEquationAdvaced);
913                                 }
914                                 else
915                                 {
916                                         // Not using blend - just put values to register as-is.
917                                         DE_ASSERT(state.blendMode == BLENDMODE_NONE);
918
919                                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
920                                         {
921                                                 if (m_sampleRegister[regSampleNdx].isAlive)
922                                                 {
923                                                         const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
924
925                                                         m_sampleRegister[regSampleNdx].blendedRGB       = frag.value.get<float>().xyz();
926                                                         m_sampleRegister[regSampleNdx].blendedA         = frag.value.get<float>().w();
927                                                 }
928                                         }
929                                 }
930
931                                 // Clamp result values in sample register
932                                 if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
933                                 {
934                                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
935                                         {
936                                                 if (m_sampleRegister[regSampleNdx].isAlive)
937                                                 {
938                                                         m_sampleRegister[regSampleNdx].blendedRGB       = clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2), maxClampValue.swizzle(0, 1, 2));
939                                                         m_sampleRegister[regSampleNdx].blendedA         = clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w());
940                                                 }
941                                         }
942                                 }
943
944                                 // Finally, write the colors to the color buffer.
945
946                                 if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
947                                 {
948                                         if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
949                                                 executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
950                                         else
951                                                 executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
952                                 }
953                                 else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
954                                         executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
955                                 break;
956                         }
957                         case rr::GENERICVECTYPE_INT32:
958                                 // Write fragments
959                                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
960                                 {
961                                         if (m_sampleRegister[regSampleNdx].isAlive)
962                                         {
963                                                 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
964
965                                                 m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
966                                         }
967                                 }
968
969                                 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
970                                         executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
971                                 break;
972
973                         case rr::GENERICVECTYPE_UINT32:
974                                 // Write fragments
975                                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
976                                 {
977                                         if (m_sampleRegister[regSampleNdx].isAlive)
978                                         {
979                                                 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
980
981                                                 m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
982                                         }
983                                 }
984
985                                 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
986                                         executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
987                                 break;
988
989                         default:
990                                 DE_ASSERT(DE_FALSE);
991                 }
992         }
993 }
994
995 } // rr