DO NOT MERGE Refresh GLES 3.1 must-pass XML am: d8e85a9be9 -s ours am: ba3d0b4eb3...
[platform/upstream/VK-GL-CTS.git] / framework / referencerenderer / rrFragmentOperations.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Reference Renderer
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Reference implementation for per-fragment operations.
22  *//*--------------------------------------------------------------------*/
23
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27
28 using tcu::IVec2;
29 using tcu::Vec3;
30 using tcu::Vec4;
31 using tcu::IVec4;
32 using tcu::UVec4;
33 using tcu::min;
34 using tcu::max;
35 using tcu::clamp;
36 using de::min;
37 using de::max;
38 using de::clamp;
39
40 namespace rr
41 {
42
43 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
44 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
45 {
46         return (oldValue & ~mask) | (newValue & mask);
47 }
48
49 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
50 {
51         return de::inBounds(point.x(), rect.left,               rect.left + rect.width) &&
52                    de::inBounds(point.y(), rect.bottom,         rect.bottom + rect.height);
53 }
54
55 static inline Vec4 unpremultiply (const Vec4& v)
56 {
57         if (v.w() > 0.0f)
58                 return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
59         else
60         {
61                 DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
62                 return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
63         }
64 }
65
66 void clearMultisampleColorBuffer        (const tcu::PixelBufferAccess& dst, const Vec4& v,      const WindowRectangle& r)       { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                                }
67 void clearMultisampleColorBuffer        (const tcu::PixelBufferAccess& dst, const IVec4& v,     const WindowRectangle& r)       { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                                }
68 void clearMultisampleColorBuffer        (const tcu::PixelBufferAccess& dst, const UVec4& v,     const WindowRectangle& r)       { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>());    }
69 void clearMultisampleDepthBuffer        (const tcu::PixelBufferAccess& dst, float v,            const WindowRectangle& r)       { tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                   }
70 void clearMultisampleStencilBuffer      (const tcu::PixelBufferAccess& dst, int v,                      const WindowRectangle& r)       { tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);                 }
71
72 FragmentProcessor::FragmentProcessor (void)
73         : m_sampleRegister()
74 {
75 }
76
77 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
78 {
79         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
80         {
81                 if (m_sampleRegister[regSampleNdx].isAlive)
82                 {
83                         int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
84
85                         if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
86                                 m_sampleRegister[regSampleNdx].isAlive = false;
87                 }
88         }
89 }
90
91 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
92 {
93 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION)                                                                                                                                                                     \
94         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                 \
95         {                                                                                                                                                                                                                                                                               \
96                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                                                     \
97                 {                                                                                                                                                                                                                                                                       \
98                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                 \
99                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                   \
100                         int                                     stencilBufferValue      = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
101                         int                                     maskedRef                       = stencilState.compMask & clampedStencilRef;                                                                                    \
102                         int                                     maskedBuf                       = stencilState.compMask & stencilBufferValue;                                                                                   \
103                         DE_UNREF(maskedRef);                                                                                                                                                                                                                    \
104                         DE_UNREF(maskedBuf);                                                                                                                                                                                                                    \
105                                                                                                                                                                                                                                                                                         \
106                         m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION);                                                                                                                    \
107                 }                                                                                                                                                                                                                                                                       \
108         }
109
110         int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
111
112         switch (stencilState.func)
113         {
114                 case TESTFUNC_NEVER:    SAMPLE_REGISTER_STENCIL_COMPARE(false)                                          break;
115                 case TESTFUNC_ALWAYS:   SAMPLE_REGISTER_STENCIL_COMPARE(true)                                           break;
116                 case TESTFUNC_LESS:             SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <  maskedBuf)         break;
117                 case TESTFUNC_LEQUAL:   SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf)         break;
118                 case TESTFUNC_GREATER:  SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >  maskedBuf)         break;
119                 case TESTFUNC_GEQUAL:   SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf)         break;
120                 case TESTFUNC_EQUAL:    SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf)         break;
121                 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf)         break;
122                 default:
123                         DE_ASSERT(false);
124         }
125
126 #undef SAMPLE_REGISTER_STENCIL_COMPARE
127 }
128
129 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
130 {
131 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION)                                                                                                                                                                                                                                                                         \
132         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                                                                                                 \
133         {                                                                                                                                                                                                                                                                                                                                                               \
134                 if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed)                                                                                                                                                            \
135                 {                                                                                                                                                                                                                                                                                                                                                       \
136                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                                                                                                 \
137                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                                                                                                   \
138                         int                                     stencilBufferValue      = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());                                                                                 \
139                                                                                                                                                                                                                                                                                                                                                                         \
140                         stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
141                         m_sampleRegister[regSampleNdx].isAlive = false;                                                                                                                                                                                                                                                 \
142                 }                                                                                                                                                                                                                                                                                                                                                       \
143         }
144
145         int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
146
147         switch (stencilState.sFail)
148         {
149                 case STENCILOP_KEEP:            SAMPLE_REGISTER_SFAIL(stencilBufferValue)                                                                                               break;
150                 case STENCILOP_ZERO:            SAMPLE_REGISTER_SFAIL(0)                                                                                                                                break;
151                 case STENCILOP_REPLACE:         SAMPLE_REGISTER_SFAIL(clampedStencilRef)                                                                                                break;
152                 case STENCILOP_INCR:            SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))              break;
153                 case STENCILOP_DECR:            SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))              break;
154                 case STENCILOP_INCR_WRAP:       SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1))                             break;
155                 case STENCILOP_DECR_WRAP:       SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1))                             break;
156                 case STENCILOP_INVERT:          SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1))                                break;
157                 default:
158                         DE_ASSERT(false);
159         }
160
161 #undef SAMPLE_REGISTER_SFAIL
162 }
163
164 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
165 {
166 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION)                                                                                                                                                                             \
167         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                         \
168         {                                                                                                                                                                                                                                                                                       \
169                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                                                             \
170                 {                                                                                                                                                                                                                                                                               \
171                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                         \
172                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                           \
173                         float                           depthBufferValue        = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());                     \
174                         float                           sampleDepthFloat        = frag.sampleDepths[fragSampleNdx];                                                                                                                     \
175                         float                           sampleDepth                     = de::clamp(sampleDepthFloat, 0.0f, 1.0f);                                                                                                      \
176                                                                                                                                                                                                                                                                                                 \
177                         m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);                                                                                                                                      \
178                                                                                                                                                                                                                                                                                                 \
179                         DE_UNREF(depthBufferValue);                                                                                                                                                                                                                     \
180                         DE_UNREF(sampleDepth);                                                                                                                                                                                                                          \
181                 }                                                                                                                                                                                                                                                                               \
182         }
183
184 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION)                                                                                                                                                                    \
185         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                         \
186         {                                                                                                                                                                                                                                                                                       \
187                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                                                                                             \
188                 {                                                                                                                                                                                                                                                                               \
189                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                         \
190                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                           \
191                         deUint32                        depthBufferValue        = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();        \
192                         float                           sampleDepthFloat        = frag.sampleDepths[fragSampleNdx];                                                                                                                     \
193                                                                                                                                                                                                                                                                                                 \
194                         /* Convert input float to target buffer format for comparison */                                                                                                                                        \
195                                                                                                                                                                                                                                                                                                 \
196                         deUint32 buffer[2];                                                                                                                                                                                                                                     \
197                                                                                                                                                                                                                                                                                                 \
198                         DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());                                                                                                            \
199                                                                                                                                                                                                                                                                                                 \
200                         tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);                                                                                                                       \
201                         access.setPixDepth(sampleDepthFloat, 0, 0, 0);                                                                                                                                                                          \
202                         deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x();                                                                                                                                                        \
203                                                                                                                                                                                                                                                                                                 \
204                         m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);                                                                                                                                      \
205                                                                                                                                                                                                                                                                                                 \
206                         DE_UNREF(depthBufferValue);                                                                                                                                                                                                                     \
207                         DE_UNREF(sampleDepth);                                                                                                                                                                                                                          \
208                 }                                                                                                                                                                                                                                                                               \
209         }
210
211         if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
212         {
213
214                 switch (depthFunc)
215                 {
216                         case TESTFUNC_NEVER:    SAMPLE_REGISTER_DEPTH_COMPARE_F(false)                                                  break;
217                         case TESTFUNC_ALWAYS:   SAMPLE_REGISTER_DEPTH_COMPARE_F(true)                                                           break;
218                         case TESTFUNC_LESS:             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <  depthBufferValue)        break;
219                         case TESTFUNC_LEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue)        break;
220                         case TESTFUNC_GREATER:  SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >  depthBufferValue)        break;
221                         case TESTFUNC_GEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue)        break;
222                         case TESTFUNC_EQUAL:    SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue)        break;
223                         case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue)        break;
224                         default:
225                                 DE_ASSERT(false);
226                 }
227
228         }
229         else
230         {
231                 switch (depthFunc)
232                 {
233                         case TESTFUNC_NEVER:    SAMPLE_REGISTER_DEPTH_COMPARE_UI(false)                                                 break;
234                         case TESTFUNC_ALWAYS:   SAMPLE_REGISTER_DEPTH_COMPARE_UI(true)                                                          break;
235                         case TESTFUNC_LESS:             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <  depthBufferValue)       break;
236                         case TESTFUNC_LEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue)       break;
237                         case TESTFUNC_GREATER:  SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >  depthBufferValue)       break;
238                         case TESTFUNC_GEQUAL:   SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue)       break;
239                         case TESTFUNC_EQUAL:    SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue)       break;
240                         case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue)       break;
241                         default:
242                                 DE_ASSERT(false);
243                 }
244         }
245
246 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
247 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
248 }
249
250 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
251 {
252         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
253         {
254                 if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
255                 {
256                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
257                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
258                         const float                     clampedDepth    = de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
259
260                         depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
261                 }
262         }
263 }
264
265 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
266 {
267 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION)                                                                                                                                                                                                                                 \
268         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                                                                                                                                                         \
269         {                                                                                                                                                                                                                                                                                                                                                       \
270                 if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION))                                                                                                                                                                                                                              \
271                 {                                                                                                                                                                                                                                                                                                                                               \
272                         int                                     fragSampleNdx           = regSampleNdx % numSamplesPerFragment;                                                                                                                                                                         \
273                         const Fragment&         frag                            = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];                                                                                                           \
274                         int                                     stencilBufferValue      = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());                                                                         \
275                                                                                                                                                                                                                                                                                                                                                                 \
276                         stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());       \
277                 }                                                                                                                                                                                                                                                                                                                                               \
278         }
279
280 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION)                                                                                                                                                                                                                     \
281                 switch (stencilState.OP_NAME)                                                                                                                                                                                                                                           \
282                 {                                                                                                                                                                                                                                                                                                       \
283                         case STENCILOP_KEEP:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue)                                                                                         break;  \
284                         case STENCILOP_ZERO:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0)                                                                                                                          break;  \
285                         case STENCILOP_REPLACE:         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef)                                                                                          break;  \
286                         case STENCILOP_INCR:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))        break;  \
287                         case STENCILOP_DECR:            SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))        break;  \
288                         case STENCILOP_INCR_WRAP:       SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1))                       break;  \
289                         case STENCILOP_DECR_WRAP:       SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1))                       break;  \
290                         case STENCILOP_INVERT:          SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1))                          break;  \
291                         default:                                                                                                                                                                                                                                                                                \
292                                 DE_ASSERT(false);                                                                                                                                                                                                                                                       \
293                 }
294
295         int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
296
297         SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
298         SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
299
300 #undef SWITCH_DPFAIL_OR_DPPASS
301 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
302 }
303
304 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
305 {
306 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)                                                                                    \
307         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                                         \
308         {                                                                                                                                                                                                                       \
309                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                                             \
310                 {                                                                                                                                                                                                               \
311                         const Vec4& src         = m_sampleRegister[regSampleNdx].clampedBlendSrcColor;                                                  \
312                         const Vec4& src1        = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;                                                 \
313                         const Vec4& dst         = m_sampleRegister[regSampleNdx].clampedBlendDstColor;                                                  \
314                         DE_UNREF(src);                                                                                                                                                                          \
315                         DE_UNREF(src1);                                                                                                                                                                         \
316                         DE_UNREF(dst);                                                                                                                                                                          \
317                                                                                                                                                                                                                                 \
318                         m_sampleRegister[regSampleNdx].FACTOR_NAME = clamp((FACTOR_EXPRESSION), Vec3(0.0f), Vec3(1.0f));        \
319                 }                                                                                                                                                                                                               \
320         }
321
322 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME)                                                                                                                                                                    \
323         switch (blendRGBState.FUNC_NAME)                                                                                                                                                                                                                        \
324         {                                                                                                                                                                                                                                                                                       \
325                 case BLENDFUNC_ZERO:                                            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f))                                                           break;  \
326                 case BLENDFUNC_ONE:                                                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f))                                                           break;  \
327                 case BLENDFUNC_SRC_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2))                                           break;  \
328                 case BLENDFUNC_ONE_MINUS_SRC_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2))                      break;  \
329                 case BLENDFUNC_DST_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2))                                           break;  \
330                 case BLENDFUNC_ONE_MINUS_DST_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2))                      break;  \
331                 case BLENDFUNC_SRC_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w()))                                                        break;  \
332                 case BLENDFUNC_ONE_MINUS_SRC_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w()))                                         break;  \
333                 case BLENDFUNC_DST_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w()))                                                        break;  \
334                 case BLENDFUNC_ONE_MINUS_DST_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w()))                                         break;  \
335                 case BLENDFUNC_CONSTANT_COLOR:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2))                            break;  \
336                 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2))       break;  \
337                 case BLENDFUNC_CONSTANT_ALPHA:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w()))                                         break;  \
338                 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w()))                          break;  \
339                 case BLENDFUNC_SRC_ALPHA_SATURATE:                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w())))       break;  \
340                 case BLENDFUNC_SRC1_COLOR:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2))                                          break;  \
341                 case BLENDFUNC_ONE_MINUS_SRC1_COLOR:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2))                     break;  \
342                 case BLENDFUNC_SRC1_ALPHA:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w()))                                                       break;  \
343                 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w()))                                        break;  \
344                 default:                                                                                                                                                                                                                                                                \
345                         DE_ASSERT(false);                                                                                                                                                                                                                                       \
346         }
347
348         SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
349         SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
350
351 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
352 #undef SAMPLE_REGISTER_BLEND_FACTOR
353 }
354
355 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
356 {
357 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)                                                            \
358         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                 \
359         {                                                                                                                                                                                               \
360                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                                     \
361                 {                                                                                                                                                                                       \
362                         const Vec4& src         = m_sampleRegister[regSampleNdx].clampedBlendSrcColor;                          \
363                         const Vec4& src1        = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;                         \
364                         const Vec4& dst         = m_sampleRegister[regSampleNdx].clampedBlendDstColor;                          \
365                         DE_UNREF(src);                                                                                                                                                  \
366                         DE_UNREF(src1);                                                                                                                                                 \
367                         DE_UNREF(dst);                                                                                                                                                  \
368                                                                                                                                                                                                         \
369                         m_sampleRegister[regSampleNdx].FACTOR_NAME = clamp((FACTOR_EXPRESSION), 0.0f, 1.0f);    \
370                 }                                                                                                                                                                                       \
371         }
372
373 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME)                                                                                                                                              \
374         switch (blendAState.FUNC_NAME)                                                                                                                                                                                          \
375         {                                                                                                                                                                                                                                                       \
376                 case BLENDFUNC_ZERO:                                            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f)                                         break;  \
377                 case BLENDFUNC_ONE:                                                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)                                         break;  \
378                 case BLENDFUNC_SRC_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())                                      break;  \
379                 case BLENDFUNC_ONE_MINUS_SRC_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())                       break;  \
380                 case BLENDFUNC_DST_COLOR:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())                                      break;  \
381                 case BLENDFUNC_ONE_MINUS_DST_COLOR:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())                       break;  \
382                 case BLENDFUNC_SRC_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())                                      break;  \
383                 case BLENDFUNC_ONE_MINUS_SRC_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())                       break;  \
384                 case BLENDFUNC_DST_ALPHA:                                       SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())                                      break;  \
385                 case BLENDFUNC_ONE_MINUS_DST_ALPHA:                     SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())                       break;  \
386                 case BLENDFUNC_CONSTANT_COLOR:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())                       break;  \
387                 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())        break;  \
388                 case BLENDFUNC_CONSTANT_ALPHA:                          SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())                       break;  \
389                 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:        SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())        break;  \
390                 case BLENDFUNC_SRC_ALPHA_SATURATE:                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)                                         break;  \
391                 case BLENDFUNC_SRC1_COLOR:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())                                     break;  \
392                 case BLENDFUNC_ONE_MINUS_SRC1_COLOR:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())                      break;  \
393                 case BLENDFUNC_SRC1_ALPHA:                                      SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())                                     break;  \
394                 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:            SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())                      break;  \
395                 default:                                                                                                                                                                                                                                \
396                         DE_ASSERT(false);                                                                                                                                                                                                       \
397         }
398
399         SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
400         SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
401
402 #undef SWITCH_SRC_OR_DST_FACTOR_A
403 #undef SAMPLE_REGISTER_BLEND_FACTOR
404 }
405
406 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
407 {
408 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION)                                             \
409         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)         \
410         {                                                                                                                                                                       \
411                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                             \
412                 {                                                                                                                                                               \
413                         SampleData& sample              = m_sampleRegister[regSampleNdx];                                       \
414                         const Vec4& srcColor    = sample.clampedBlendSrcColor;                                          \
415                         const Vec4& dstColor    = sample.clampedBlendDstColor;                                          \
416                                                                                                                                                                                 \
417                         sample.COLOR_NAME = (COLOR_EXPRESSION);                                                                         \
418                 }                                                                                                                                                               \
419         }
420
421         switch (blendRGBState.equation)
422         {
423                 case BLENDEQUATION_ADD:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)  break;
424                 case BLENDEQUATION_SUBTRACT:                    SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)  break;
425                 case BLENDEQUATION_REVERSE_SUBTRACT:    SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB)  break;
426                 case BLENDEQUATION_MIN:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))                                                                                                break;
427                 case BLENDEQUATION_MAX:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))                                                                                                break;
428                 default:
429                         DE_ASSERT(false);
430         }
431
432         switch (blendAState.equation)
433         {
434                 case BLENDEQUATION_ADD:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA)      break;
435                 case BLENDEQUATION_SUBTRACT:                    SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA)      break;
436                 case BLENDEQUATION_REVERSE_SUBTRACT:    SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA)      break;
437                 case BLENDEQUATION_MIN:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w()))                                                                                        break;
438                 case BLENDEQUATION_MAX:                                 SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w()))                                                                                        break;
439                 default:
440                         DE_ASSERT(false);
441         }
442 #undef SAMPLE_REGISTER_BLENDED_COLOR
443 }
444
445 namespace advblend
446 {
447
448 inline float    multiply        (float src, float dst) { return src*dst;                                        }
449 inline float    screen          (float src, float dst) { return src + dst - src*dst;            }
450 inline float    darken          (float src, float dst) { return de::min(src, dst);                      }
451 inline float    lighten         (float src, float dst) { return de::max(src, dst);                      }
452 inline float    difference      (float src, float dst) { return de::abs(dst-src);                       }
453 inline float    exclusion       (float src, float dst) { return src + dst - 2.0f*src*dst;       }
454
455 inline float overlay (float src, float dst)
456 {
457         if (dst <= 0.5f)
458                 return 2.0f*src*dst;
459         else
460                 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
461 }
462
463 inline float colordodge (float src, float dst)
464 {
465         if (dst <= 0.0f)
466                 return 0.0f;
467         else if (src < 1.0f)
468                 return de::min(1.0f, dst/(1.0f-src));
469         else
470                 return 1.0f;
471 }
472
473 inline float colorburn (float src, float dst)
474 {
475         if (dst >= 1.0f)
476                 return 1.0f;
477         else if (src > 0.0f)
478                 return 1.0f - de::min(1.0f, (1.0f-dst)/src);
479         else
480                 return 0.0f;
481 }
482
483 inline float hardlight (float src, float dst)
484 {
485         if (src <= 0.5f)
486                 return 2.0f*src*dst;
487         else
488                 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
489 }
490
491 inline float softlight (float src, float dst)
492 {
493         if (src <= 0.5f)
494                 return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
495         else if (dst <= 0.25f)
496                 return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
497         else
498                 return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
499 }
500
501 inline float minComp (const Vec3& v)
502 {
503         return de::min(de::min(v.x(), v.y()), v.z());
504 }
505
506 inline float maxComp (const Vec3& v)
507 {
508         return de::max(de::max(v.x(), v.y()), v.z());
509 }
510
511 inline float luminosity (const Vec3& rgb)
512 {
513         return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
514 }
515
516 inline float saturation (const Vec3& rgb)
517 {
518         return maxComp(rgb) - minComp(rgb);
519 }
520
521 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
522 {
523         const float             lbase   = luminosity(cbase);
524         const float             llum    = luminosity(clum);
525         const float             ldiff   = llum - lbase;
526         const Vec3              color   = cbase + Vec3(ldiff);
527         const float             minC    = minComp(color);
528         const float             maxC    = maxComp(color);
529
530         if (minC < 0.0f)
531                 return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
532         else if (maxC > 1.0f)
533                 return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
534         else
535                 return color;
536 }
537
538 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
539 {
540         const float             minbase = minComp(cbase);
541         const float             sbase   = saturation(cbase);
542         const float             ssat    = saturation(csat);
543         Vec3                    color   = Vec3(0.0f);
544
545         if (sbase > 0.0f)
546                 color = (cbase - minbase) * ssat / sbase;
547         else
548                 color = color;
549
550         return setLum(color, clum);
551 }
552
553 } // advblend
554
555 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
556 {
557         using namespace advblend;
558
559 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME)                                                                                        \
560         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                 \
561         {                                                                                                                                                                               \
562                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                     \
563                 {                                                                                                                                                                       \
564                         SampleData&     sample          = m_sampleRegister[regSampleNdx];                                               \
565                         const Vec4&     srcColor        = sample.clampedBlendSrcColor;                                                  \
566                         const Vec4&     dstColor        = sample.clampedBlendDstColor;                                                  \
567                         const Vec3&     bias            = sample.blendSrcFactorRGB;                                                             \
568                         const float     p0                      = sample.blendSrcFactorA;                                                               \
569                         const float     r                       = FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0]; \
570                         const float     g                       = FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1]; \
571                         const float     b                       = FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2]; \
572                                                                                                                                                                                         \
573                         sample.blendedRGB = Vec3(r, g, b);                                                                                              \
574                 }                                                                                                                                                                       \
575         }
576
577 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION)                                                                         \
578         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                 \
579         {                                                                                                                                                                               \
580                 if (m_sampleRegister[regSampleNdx].isAlive)                                                                                     \
581                 {                                                                                                                                                                       \
582                         SampleData&     sample          = m_sampleRegister[regSampleNdx];                                               \
583                         const Vec3      srcColor        = sample.clampedBlendSrcColor.swizzle(0,1,2);                   \
584                         const Vec3      dstColor        = sample.clampedBlendDstColor.swizzle(0,1,2);                   \
585                         const Vec3&     bias            = sample.blendSrcFactorRGB;                                                             \
586                         const float     p0                      = sample.blendSrcFactorA;                                                               \
587                                                                                                                                                                                         \
588                         sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias;                                                               \
589                 }                                                                                                                                                                       \
590         }
591
592         // Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
593         // \note clampedBlend*Color contains clamped & unpremultiplied colors
594         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
595         {
596                 if (m_sampleRegister[regSampleNdx].isAlive)
597                 {
598                         SampleData&     sample          = m_sampleRegister[regSampleNdx];
599                         const Vec4&     srcColor        = sample.clampedBlendSrcColor;
600                         const Vec4&     dstColor        = sample.clampedBlendDstColor;
601                         const float     srcA            = srcColor.w();
602                         const float     dstA            = dstColor.w();
603                         const float     p0                      = srcA*dstA;
604                         const float p1                  = srcA*(1.0f-dstA);
605                         const float p2                  = dstA*(1.0f-srcA);
606                         const Vec3      bias            (srcColor[0]*p1 + dstColor[0]*p2,
607                                                                          srcColor[1]*p1 + dstColor[1]*p2,
608                                                                          srcColor[2]*p1 + dstColor[2]*p2);
609
610                         sample.blendSrcFactorRGB        = bias;
611                         sample.blendSrcFactorA          = p0;
612                         sample.blendedA                         = p0 + p1 + p2;
613                 }
614         }
615
616         switch (equation)
617         {
618                 case BLENDEQUATION_ADVANCED_MULTIPLY:           SAMPLE_REGISTER_ADV_BLEND(multiply);                                                                    break;
619                 case BLENDEQUATION_ADVANCED_SCREEN:                     SAMPLE_REGISTER_ADV_BLEND(screen);                                                                              break;
620                 case BLENDEQUATION_ADVANCED_OVERLAY:            SAMPLE_REGISTER_ADV_BLEND(overlay);                                                                             break;
621                 case BLENDEQUATION_ADVANCED_DARKEN:                     SAMPLE_REGISTER_ADV_BLEND(darken);                                                                              break;
622                 case BLENDEQUATION_ADVANCED_LIGHTEN:            SAMPLE_REGISTER_ADV_BLEND(lighten);                                                                             break;
623                 case BLENDEQUATION_ADVANCED_COLORDODGE:         SAMPLE_REGISTER_ADV_BLEND(colordodge);                                                                  break;
624                 case BLENDEQUATION_ADVANCED_COLORBURN:          SAMPLE_REGISTER_ADV_BLEND(colorburn);                                                                   break;
625                 case BLENDEQUATION_ADVANCED_HARDLIGHT:          SAMPLE_REGISTER_ADV_BLEND(hardlight);                                                                   break;
626                 case BLENDEQUATION_ADVANCED_SOFTLIGHT:          SAMPLE_REGISTER_ADV_BLEND(softlight);                                                                   break;
627                 case BLENDEQUATION_ADVANCED_DIFFERENCE:         SAMPLE_REGISTER_ADV_BLEND(difference);                                                                  break;
628                 case BLENDEQUATION_ADVANCED_EXCLUSION:          SAMPLE_REGISTER_ADV_BLEND(exclusion);                                                                   break;
629                 case BLENDEQUATION_ADVANCED_HSL_HUE:            SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor)); break;
630                 case BLENDEQUATION_ADVANCED_HSL_SATURATION:     SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor)); break;
631                 case BLENDEQUATION_ADVANCED_HSL_COLOR:          SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor));                              break;
632                 case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY:     SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor));                              break;
633                 default:
634                         DE_ASSERT(false);
635         }
636
637 #undef SAMPLE_REGISTER_ADV_BLEND
638 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
639 }
640
641 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
642 {
643         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
644         {
645                 if (m_sampleRegister[regSampleNdx].isAlive)
646                 {
647                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
648                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
649                         Vec4                            combinedColor;
650
651                         combinedColor.xyz()     = m_sampleRegister[regSampleNdx].blendedRGB;
652                         combinedColor.w()       = m_sampleRegister[regSampleNdx].blendedA;
653
654                         if (isSRGB)
655                                 combinedColor = tcu::linearToSRGB(combinedColor);
656
657                         colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
658                 }
659         }
660 }
661
662 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
663 {
664         const int               fragStride      = 4;
665         const int               xStride         = colorBuffer.getRowPitch();
666         const int               yStride         = colorBuffer.getSlicePitch();
667         deUint8* const  basePtr         = (deUint8*)colorBuffer.getDataPtr();
668
669         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
670         {
671                 if (m_sampleRegister[regSampleNdx].isAlive)
672                 {
673                         const int                       fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
674                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
675                         deUint8*                        dstPtr                  = basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
676
677                         dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
678                         dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
679                         dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
680                         dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
681                 }
682         }
683 }
684
685 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
686 {
687         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
688         {
689                 if (m_sampleRegister[regSampleNdx].isAlive)
690                 {
691                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
692                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
693                         Vec4                            originalColor   = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
694                         Vec4                            newColor;
695
696                         newColor.xyz()  = m_sampleRegister[regSampleNdx].blendedRGB;
697                         newColor.w()    = m_sampleRegister[regSampleNdx].blendedA;
698
699                         if (isSRGB)
700                                 newColor = tcu::linearToSRGB(newColor);
701
702                         newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
703
704                         colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
705                 }
706         }
707 }
708
709 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
710 {
711         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
712         {
713                 if (m_sampleRegister[regSampleNdx].isAlive)
714                 {
715                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
716                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
717                         const IVec4                     originalValue   = colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
718
719                         colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
720                 }
721         }
722 }
723
724 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
725 {
726         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
727         {
728                 if (m_sampleRegister[regSampleNdx].isAlive)
729                 {
730                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
731                         const Fragment&         frag                    = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
732                         const UVec4                     originalValue   = colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
733
734                         colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
735                 }
736         }
737 }
738
739 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess&         msColorBuffer,
740                                                                 const rr::MultisamplePixelBufferAccess&         msDepthBuffer,
741                                                                 const rr::MultisamplePixelBufferAccess&         msStencilBuffer,
742                                                                 const Fragment*                                                         inputFragments,
743                                                                 int                                                                                     numFragments,
744                                                                 FaceType                                                                        fragmentFacing,
745                                                                 const FragmentOperationState&                           state)
746 {
747         DE_ASSERT(fragmentFacing < FACETYPE_LAST);
748         DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
749
750         const tcu::PixelBufferAccess&   colorBuffer                     = msColorBuffer.raw();
751         const tcu::PixelBufferAccess&   depthBuffer                     = msDepthBuffer.raw();
752         const tcu::PixelBufferAccess&   stencilBuffer           = msStencilBuffer.raw();
753
754         bool                                                    hasDepth                        = depthBuffer.getWidth() > 0    && depthBuffer.getHeight() > 0          && depthBuffer.getDepth() > 0;
755         bool                                                    hasStencil                      = stencilBuffer.getWidth() > 0  && stencilBuffer.getHeight() > 0        && stencilBuffer.getDepth() > 0;
756         bool                                                    doDepthTest                     = hasDepth && state.depthTestEnabled;
757         bool                                                    doStencilTest           = hasStencil && state.stencilTestEnabled;
758
759         tcu::TextureChannelClass                colorbufferClass        = tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
760         rr::GenericVecType                              fragmentDataType        = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
761
762         DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth())       && (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
763         DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight())     && (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
764         DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth())       && (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
765
766         // Combined formats must be separated beforehand
767         DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D));
768         DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S));
769
770         int                                             numSamplesPerFragment           = colorBuffer.getWidth();
771         int                                             totalNumSamples                         = numFragments*numSamplesPerFragment;
772         int                                             numSampleGroups                         = (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
773         const StencilState&             stencilState                            = state.stencilStates[fragmentFacing];
774         Vec4                                    colorMaskFactor                         (state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
775         Vec4                                    colorMaskNegationFactor         (state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
776         bool                                    sRGBTarget                                      = state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
777
778         DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
779
780         // Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
781         // the per-sample operations for one group at a time.
782
783         for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
784         {
785                 // The index of the fragment of the sample at the beginning of m_sampleRegisters.
786                 int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
787
788                 // Initialize sample data in the sample register.
789
790                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
791                 {
792                         int fragNdx                     = groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
793                         int fragSampleNdx       = regSampleNdx % numSamplesPerFragment;
794
795                         if (fragNdx < numFragments)
796                         {
797                                 m_sampleRegister[regSampleNdx].isAlive          = (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
798                                 m_sampleRegister[regSampleNdx].depthPassed      = true; // \note This will stay true if depth test is disabled.
799                         }
800                         else
801                                 m_sampleRegister[regSampleNdx].isAlive = false;
802                 }
803
804                 // Scissor test.
805
806                 if (state.scissorTestEnabled)
807                         executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
808
809                 // Stencil test.
810
811                 if (doStencilTest)
812                 {
813                         executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
814                         executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
815                 }
816
817                 // Depth test.
818                 // \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
819
820                 if (doDepthTest)
821                 {
822                         executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
823
824                         if (state.depthMask)
825                                 executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
826                 }
827
828                 // Do dpFail and dpPass stencil writes.
829
830                 if (doStencilTest)
831                         executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
832
833                 // Kill the samples that failed depth test.
834
835                 if (doDepthTest)
836                 {
837                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
838                                 m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
839                 }
840
841                 // Paint fragments to target
842
843                 switch (fragmentDataType)
844                 {
845                         case rr::GENERICVECTYPE_FLOAT:
846                                 // Blend calculation - only if using blend.
847                                 if (state.blendMode == BLENDMODE_STANDARD)
848                                 {
849                                         // Put dst color to register, doing srgb-to-linear conversion if needed.
850                                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
851                                         {
852                                                 if (m_sampleRegister[regSampleNdx].isAlive)
853                                                 {
854                                                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
855                                                         const Fragment&         frag                    = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
856                                                         Vec4                            dstColor                = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
857
858                                                         m_sampleRegister[regSampleNdx].clampedBlendSrcColor             = clamp(frag.value.get<float>(), Vec4(0.0f), Vec4(1.0f));
859                                                         m_sampleRegister[regSampleNdx].clampedBlendSrc1Color    = clamp(frag.value1.get<float>(), Vec4(0.0f), Vec4(1.0f));
860                                                         m_sampleRegister[regSampleNdx].clampedBlendDstColor             = clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, Vec4(0.0f), Vec4(1.0f));
861                                                 }
862                                         }
863
864                                         // Calculate blend factors to register.
865                                         executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
866                                         executeBlendFactorComputeA(state.blendColor, state.blendAState);
867
868                                         // Compute blended color.
869                                         executeBlend(state.blendRGBState, state.blendAState);
870                                 }
871                                 else if (state.blendMode == BLENDMODE_ADVANCED)
872                                 {
873                                         // Unpremultiply colors for blending, and do sRGB->linear if necessary
874                                         // \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
875                                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
876                                         {
877                                                 if (m_sampleRegister[regSampleNdx].isAlive)
878                                                 {
879                                                         int                                     fragSampleNdx   = regSampleNdx % numSamplesPerFragment;
880                                                         const Fragment&         frag                    = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
881                                                         const Vec4                      srcColor                = frag.value.get<float>();
882                                                         const Vec4                      dstColor                = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
883
884                                                         m_sampleRegister[regSampleNdx].clampedBlendSrcColor             = unpremultiply(clamp(srcColor, Vec4(0.0f), Vec4(1.0f)));
885                                                         m_sampleRegister[regSampleNdx].clampedBlendDstColor             = unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, Vec4(0.0f), Vec4(1.0f)));
886                                                 }
887                                         }
888
889                                         executeAdvancedBlend(state.blendEquationAdvaced);
890                                 }
891                                 else
892                                 {
893                                         // Not using blend - just put values to register as-is.
894                                         DE_ASSERT(state.blendMode == BLENDMODE_NONE);
895
896                                         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
897                                         {
898                                                 if (m_sampleRegister[regSampleNdx].isAlive)
899                                                 {
900                                                         const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
901
902                                                         m_sampleRegister[regSampleNdx].blendedRGB       = frag.value.get<float>().xyz();
903                                                         m_sampleRegister[regSampleNdx].blendedA         = frag.value.get<float>().w();
904                                                 }
905                                         }
906                                 }
907
908                                 // Finally, write the colors to the color buffer.
909
910                                 if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
911                                 {
912                                         if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
913                                                 executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
914                                         else
915                                                 executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
916                                 }
917                                 else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
918                                         executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
919                                 break;
920
921                         case rr::GENERICVECTYPE_INT32:
922                                 // Write fragments
923                                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
924                                 {
925                                         if (m_sampleRegister[regSampleNdx].isAlive)
926                                         {
927                                                 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
928
929                                                 m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
930                                         }
931                                 }
932
933                                 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
934                                         executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
935                                 break;
936
937                         case rr::GENERICVECTYPE_UINT32:
938                                 // Write fragments
939                                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
940                                 {
941                                         if (m_sampleRegister[regSampleNdx].isAlive)
942                                         {
943                                                 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
944
945                                                 m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
946                                         }
947                                 }
948
949                                 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
950                                         executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
951                                 break;
952
953                         default:
954                                 DE_ASSERT(DE_FALSE);
955                 }
956         }
957 }
958
959 } // rr