nvfx: implement LIT in fp

author Luca Barbieri <luca@luca-barbieri.com>

Fri, 3 Sep 2010 20:06:41 +0000 (22:06 +0200)

committer Luca Barbieri <luca@luca-barbieri.com>

Fri, 3 Sep 2010 20:37:35 +0000 (22:37 +0200)
author Luca Barbieri <luca@luca-barbieri.com>
Fri, 3 Sep 2010 20:06:41 +0000 (22:06 +0200)
committer Luca Barbieri <luca@luca-barbieri.com>
Fri, 3 Sep 2010 20:37:35 +0000 (22:37 +0200)
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c

index 275672a..6eb744e 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -1,3 +1,4 @@
+#include <float.h>
  #include "pipe/p_context.h"
  #include "pipe/p_defines.h"
  #include "pipe/p_state.h"
@@ -629,7 +630,28 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
         case TGSI_OPCODE_LG2:
                 nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
                 break;
-//     case TGSI_OPCODE_LIT:
+       case TGSI_OPCODE_LIT:
+               if(!nvfx->is_nv4x)
+                       nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], src[1], src[2]));
+               else {
+                       /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by
+                        * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement
+                        *
+                        * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead
+                        */
+                       float maxv[4] = {0, FLT_MIN, 0, 0};
+                       struct nvfx_src maxs = nvfx_src(constant(fpc, -1, maxv));
+                       tmp = nvfx_src(temp(fpc));
+                       if (ci>= 0 || ii >= 0) {
+                               nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none));
+                               maxs = tmp;
+                       }
+                       nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none));
+                       nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none));
+                       nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none));
+                       nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none));
+               }
+               break;
         case TGSI_OPCODE_LRP:
                 if(!nvfx->is_nv4x)
                         nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h

index e642a27..badc63d 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_shader.h
+++ b/src/gallium/drivers/nvfx/nvfx_shader.h
@@ -263,6 +263,7 @@
  
  /* NV40 only fragment program opcodes */
  #define NVFX_FP_OP_OPCODE_TXL_NV40 0x2F
+#define NVFX_FP_OP_OPCODE_LITEX2_NV40 0x3C
  
  /* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
  #define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0
author	Luca Barbieri <luca@luca-barbieri.com>
	Fri, 3 Sep 2010 20:06:41 +0000 (22:06 +0200)
committer	Luca Barbieri <luca@luca-barbieri.com>
	Fri, 3 Sep 2010 20:37:35 +0000 (22:37 +0200)
src/gallium/drivers/nvfx/nvfx_fragprog.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_shader.h		patch \| blob \| history