1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
33 #include <transpose_matrix4x4.h>
34 #include "pipe/p_format.h"
36 #include "spu_colorpack.h"
37 #include "spu_per_fragment_op.h"
40 #define LINEAR_QUAD_LAYOUT 1
43 static INLINE vector float
44 spu_min(vector float a, vector float b)
46 vector unsigned int m;
47 m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
48 return spu_sel(a, b, m);
52 static INLINE vector float
53 spu_max(vector float a, vector float b)
55 vector unsigned int m;
56 m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
57 return spu_sel(b, a, m);
62 * Called by rasterizer for each quad after the shader has run. Do
63 * all the per-fragment operations including alpha test, z test,
64 * stencil test, blend, colormask and logicops. This is a
65 * fallback/debug function. In reality we'll use a generated function
66 * produced by the PPU. But this function is useful for
70 spu_fallback_fragment_ops(uint x, uint y,
72 tile_t *depthStencilTile,
78 vector unsigned int mask)
80 vector float frag_aos[4];
81 unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
82 unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
87 if (spu.depth_stencil_alpha.alpha.enabled) {
88 vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value);
89 vector unsigned int amask;
91 switch (spu.depth_stencil_alpha.alpha.func) {
93 amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
95 case PIPE_FUNC_GREATER:
96 amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
98 case PIPE_FUNC_GEQUAL:
99 amask = spu_cmpgt(ref, fragA);
100 amask = spu_nor(amask, amask);
102 case PIPE_FUNC_LEQUAL:
103 amask = spu_cmpgt(fragA, ref);
104 amask = spu_nor(amask, amask);
106 case PIPE_FUNC_EQUAL:
107 amask = spu_cmpeq(ref, fragA);
109 case PIPE_FUNC_NOTEQUAL:
110 amask = spu_cmpeq(ref, fragA);
111 amask = spu_nor(amask, amask);
113 case PIPE_FUNC_ALWAYS:
114 amask = spu_splats(0xffffffffU);
116 case PIPE_FUNC_NEVER:
117 amask = spu_splats( 0x0U);
123 mask = spu_and(mask, amask);
128 * Z and/or stencil testing...
130 if (spu.depth_stencil_alpha.depth.enabled ||
131 spu.depth_stencil_alpha.stencil[0].enabled) {
133 /* get four Z/Stencil values from tile */
134 vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
135 vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
136 vector unsigned int ifbZ = spu_and(ifbZS, mask24);
137 vector unsigned int ifbS = spu_andc(ifbZS, mask24);
139 if (spu.depth_stencil_alpha.stencil[0].enabled) {
140 /* do stencil test */
141 ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED);
144 else if (spu.depth_stencil_alpha.depth.enabled) {
147 ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
148 spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM);
150 vector unsigned int ifragZ;
151 vector unsigned int zmask;
153 /* convert four fragZ from float to uint */
154 fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
155 ifragZ = spu_convtu(fragZ, 0);
157 /* do depth comparison, setting zmask with results */
158 switch (spu.depth_stencil_alpha.depth.func) {
160 zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
162 case PIPE_FUNC_GREATER:
163 zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
165 case PIPE_FUNC_GEQUAL:
166 zmask = spu_cmpgt(ifbZ, ifragZ);
167 zmask = spu_nor(zmask, zmask);
169 case PIPE_FUNC_LEQUAL:
170 zmask = spu_cmpgt(ifragZ, ifbZ);
171 zmask = spu_nor(zmask, zmask);
173 case PIPE_FUNC_EQUAL:
174 zmask = spu_cmpeq(ifbZ, ifragZ);
176 case PIPE_FUNC_NOTEQUAL:
177 zmask = spu_cmpeq(ifbZ, ifragZ);
178 zmask = spu_nor(zmask, zmask);
180 case PIPE_FUNC_ALWAYS:
181 zmask = spu_splats(0xffffffffU);
183 case PIPE_FUNC_NEVER:
184 zmask = spu_splats( 0x0U);
190 mask = spu_and(mask, zmask);
192 /* merge framebuffer Z and fragment Z according to the mask */
193 ifbZ = spu_or(spu_and(ifragZ, mask),
194 spu_andc(ifbZ, mask));
197 if (spu_extract(spu_orx(mask), 0)) {
198 /* put new fragment Z/Stencil values back into Z/Stencil tile */
199 depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
201 spu.cur_ztile_status = TILE_STATUS_DIRTY;
207 * If we'll need the current framebuffer/tile colors for blending
208 * or logicop or colormask, fetch them now.
210 if (spu.blend.rt[0].blend_enable ||
211 spu.blend.logicop_enable ||
212 spu.blend.rt[0].colormask != 0xf) {
214 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
215 fbc0 = colorTile->ui[y][x*2+0];
216 fbc1 = colorTile->ui[y][x*2+1];
217 fbc2 = colorTile->ui[y][x*2+2];
218 fbc3 = colorTile->ui[y][x*2+3];
220 fbc0 = colorTile->ui[y+0][x+0];
221 fbc1 = colorTile->ui[y+0][x+1];
222 fbc2 = colorTile->ui[y+1][x+0];
223 fbc3 = colorTile->ui[y+1][x+1];
231 if (spu.blend.rt[0].blend_enable) {
232 /* blending terms, misc regs */
233 vector float term1r, term1g, term1b, term1a;
234 vector float term2r, term2g, term2b, term2a;
235 vector float one, tmp;
237 vector float fbRGBA[4]; /* current framebuffer colors */
239 /* convert framebuffer colors from packed int to vector float */
241 vector float temp[4]; /* float colors in AOS form */
242 switch (spu.fb.color_format) {
243 case PIPE_FORMAT_A8R8G8B8_UNORM:
244 temp[0] = spu_unpack_B8G8R8A8(fbc0);
245 temp[1] = spu_unpack_B8G8R8A8(fbc1);
246 temp[2] = spu_unpack_B8G8R8A8(fbc2);
247 temp[3] = spu_unpack_B8G8R8A8(fbc3);
249 case PIPE_FORMAT_B8G8R8A8_UNORM:
250 temp[0] = spu_unpack_A8R8G8B8(fbc0);
251 temp[1] = spu_unpack_A8R8G8B8(fbc1);
252 temp[2] = spu_unpack_A8R8G8B8(fbc2);
253 temp[3] = spu_unpack_A8R8G8B8(fbc3);
258 _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
262 * Compute Src RGB terms (fragment color * factor)
264 switch (spu.blend.rt[0].rgb_src_factor) {
265 case PIPE_BLENDFACTOR_ONE:
270 case PIPE_BLENDFACTOR_ZERO:
273 term1b = spu_splats(0.0f);
275 case PIPE_BLENDFACTOR_SRC_COLOR:
276 term1r = spu_mul(fragR, fragR);
277 term1g = spu_mul(fragG, fragG);
278 term1b = spu_mul(fragB, fragB);
280 case PIPE_BLENDFACTOR_SRC_ALPHA:
281 term1r = spu_mul(fragR, fragA);
282 term1g = spu_mul(fragG, fragA);
283 term1b = spu_mul(fragB, fragA);
285 case PIPE_BLENDFACTOR_DST_COLOR:
286 term1r = spu_mul(fragR, fbRGBA[0]);
287 term1g = spu_mul(fragG, fbRGBA[1]);
288 term1b = spu_mul(fragB, fbRGBA[1]);
290 case PIPE_BLENDFACTOR_DST_ALPHA:
291 term1r = spu_mul(fragR, fbRGBA[3]);
292 term1g = spu_mul(fragG, fbRGBA[3]);
293 term1b = spu_mul(fragB, fbRGBA[3]);
295 case PIPE_BLENDFACTOR_CONST_COLOR:
296 term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
297 term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
298 term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
300 case PIPE_BLENDFACTOR_CONST_ALPHA:
301 term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
302 term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
303 term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
311 * Compute Src Alpha term (fragment alpha * factor)
313 switch (spu.blend.rt[0].alpha_src_factor) {
314 case PIPE_BLENDFACTOR_ONE:
317 case PIPE_BLENDFACTOR_SRC_COLOR:
318 term1a = spu_splats(0.0f);
320 case PIPE_BLENDFACTOR_SRC_ALPHA:
321 term1a = spu_mul(fragA, fragA);
323 case PIPE_BLENDFACTOR_DST_COLOR:
325 case PIPE_BLENDFACTOR_DST_ALPHA:
326 term1a = spu_mul(fragA, fbRGBA[3]);
328 case PIPE_BLENDFACTOR_CONST_COLOR:
330 case PIPE_BLENDFACTOR_CONST_ALPHA:
331 term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
339 * Compute Dest RGB terms (framebuffer color * factor)
341 switch (spu.blend.rt[0].rgb_dst_factor) {
342 case PIPE_BLENDFACTOR_ONE:
347 case PIPE_BLENDFACTOR_ZERO:
350 term2b = spu_splats(0.0f);
352 case PIPE_BLENDFACTOR_SRC_COLOR:
353 term2r = spu_mul(fbRGBA[0], fragR);
354 term2g = spu_mul(fbRGBA[1], fragG);
355 term2b = spu_mul(fbRGBA[2], fragB);
357 case PIPE_BLENDFACTOR_SRC_ALPHA:
358 term2r = spu_mul(fbRGBA[0], fragA);
359 term2g = spu_mul(fbRGBA[1], fragA);
360 term2b = spu_mul(fbRGBA[2], fragA);
362 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
363 one = spu_splats(1.0f);
364 tmp = spu_sub(one, fragA);
365 term2r = spu_mul(fbRGBA[0], tmp);
366 term2g = spu_mul(fbRGBA[1], tmp);
367 term2b = spu_mul(fbRGBA[2], tmp);
369 case PIPE_BLENDFACTOR_DST_COLOR:
370 term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
371 term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
372 term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
374 case PIPE_BLENDFACTOR_DST_ALPHA:
375 term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
376 term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
377 term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
379 case PIPE_BLENDFACTOR_CONST_COLOR:
380 term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
381 term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
382 term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
384 case PIPE_BLENDFACTOR_CONST_ALPHA:
385 term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
386 term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
387 term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
395 * Compute Dest Alpha term (framebuffer alpha * factor)
397 switch (spu.blend.rt[0].alpha_dst_factor) {
398 case PIPE_BLENDFACTOR_ONE:
401 case PIPE_BLENDFACTOR_SRC_COLOR:
402 term2a = spu_splats(0.0f);
404 case PIPE_BLENDFACTOR_SRC_ALPHA:
405 term2a = spu_mul(fbRGBA[3], fragA);
407 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
408 one = spu_splats(1.0f);
409 tmp = spu_sub(one, fragA);
410 term2a = spu_mul(fbRGBA[3], tmp);
412 case PIPE_BLENDFACTOR_DST_COLOR:
414 case PIPE_BLENDFACTOR_DST_ALPHA:
415 term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
417 case PIPE_BLENDFACTOR_CONST_COLOR:
419 case PIPE_BLENDFACTOR_CONST_ALPHA:
420 term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
428 * Combine Src/Dest RGB terms
430 switch (spu.blend.rt[0].rgb_func) {
432 fragR = spu_add(term1r, term2r);
433 fragG = spu_add(term1g, term2g);
434 fragB = spu_add(term1b, term2b);
436 case PIPE_BLEND_SUBTRACT:
437 fragR = spu_sub(term1r, term2r);
438 fragG = spu_sub(term1g, term2g);
439 fragB = spu_sub(term1b, term2b);
441 case PIPE_BLEND_REVERSE_SUBTRACT:
442 fragR = spu_sub(term2r, term1r);
443 fragG = spu_sub(term2g, term1g);
444 fragB = spu_sub(term2b, term1b);
447 fragR = spu_min(term1r, term2r);
448 fragG = spu_min(term1g, term2g);
449 fragB = spu_min(term1b, term2b);
452 fragR = spu_max(term1r, term2r);
453 fragG = spu_max(term1g, term2g);
454 fragB = spu_max(term1b, term2b);
461 * Combine Src/Dest A term
463 switch (spu.blend.rt[0].alpha_func) {
465 fragA = spu_add(term1a, term2a);
467 case PIPE_BLEND_SUBTRACT:
468 fragA = spu_sub(term1a, term2a);
470 case PIPE_BLEND_REVERSE_SUBTRACT:
471 fragA = spu_sub(term2a, term1a);
474 fragA = spu_min(term1a, term2a);
477 fragA = spu_max(term1a, term2a);
486 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
491 vector float frag_soa[4];
496 _transpose_matrix4x4(frag_aos, frag_soa);
499 /* short-cut relying on function parameter layout: */
500 _transpose_matrix4x4(frag_aos, &fragR);
506 * Pack fragment float colors into 32-bit RGBA words.
508 switch (spu.fb.color_format) {
509 case PIPE_FORMAT_B8G8R8A8_UNORM:
510 fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
511 fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
512 fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
513 fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
515 case PIPE_FORMAT_A8R8G8B8_UNORM:
516 fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
517 fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
518 fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
519 fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
522 fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
530 if (spu.blend.rt[0].colormask != 0xf) {
531 uint cmask = 0x0; /* each byte corresponds to a color channel */
533 /* Form bitmask depending on color buffer format and colormask bits */
534 switch (spu.fb.color_format) {
535 case PIPE_FORMAT_B8G8R8A8_UNORM:
536 if (spu.blend.rt[0].colormask & PIPE_MASK_R)
537 cmask |= 0x00ff0000; /* red */
538 if (spu.blend.rt[0].colormask & PIPE_MASK_G)
539 cmask |= 0x0000ff00; /* green */
540 if (spu.blend.rt[0].colormask & PIPE_MASK_B)
541 cmask |= 0x000000ff; /* blue */
542 if (spu.blend.rt[0].colormask & PIPE_MASK_A)
543 cmask |= 0xff000000; /* alpha */
545 case PIPE_FORMAT_A8R8G8B8_UNORM:
546 if (spu.blend.rt[0].colormask & PIPE_MASK_R)
547 cmask |= 0x0000ff00; /* red */
548 if (spu.blend.rt[0].colormask & PIPE_MASK_G)
549 cmask |= 0x00ff0000; /* green */
550 if (spu.blend.rt[0].colormask & PIPE_MASK_B)
551 cmask |= 0xff000000; /* blue */
552 if (spu.blend.rt[0].colormask & PIPE_MASK_A)
553 cmask |= 0x000000ff; /* alpha */
560 * Apply color mask to the 32-bit packed colors.
562 * frag color[i] = frag color[i];
564 * frag color[i] = framebuffer color[i];
566 fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
567 fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
568 fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
569 fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
576 if (spu.blend.logicop_enable) {
578 /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
583 * If mask is non-zero, mark tile as dirty.
585 if (spu_extract(spu_orx(mask), 0)) {
586 spu.cur_ctile_status = TILE_STATUS_DIRTY;
589 /* write no fragments */
595 * Write new fragment/quad colors to the framebuffer/tile.
596 * Only write pixels where the corresponding mask word is set.
598 #if LINEAR_QUAD_LAYOUT
605 if (spu_extract(mask, 0))
606 colorTile->ui[y][x*2] = fragc0;
607 if (spu_extract(mask, 1))
608 colorTile->ui[y][x*2+1] = fragc1;
609 if (spu_extract(mask, 2))
610 colorTile->ui[y][x*2+2] = fragc2;
611 if (spu_extract(mask, 3))
612 colorTile->ui[y][x*2+3] = fragc3;
622 if (spu_extract(mask, 0))
623 colorTile->ui[y+0][x+0] = fragc0;
624 if (spu_extract(mask, 1))
625 colorTile->ui[y+0][x+1] = fragc1;
626 if (spu_extract(mask, 2))
627 colorTile->ui[y+1][x+0] = fragc2;
628 if (spu_extract(mask, 3))
629 colorTile->ui[y+1][x+1] = fragc3;