add complete support for ATI_fragment_shader for the r200. Most of the new code is...
authorRoland Scheidegger <rscheidegger@gmx.ch>
Mon, 12 Sep 2005 21:20:10 +0000 (21:20 +0000)
committerRoland Scheidegger <rscheidegger@gmx.ch>
Mon, 12 Sep 2005 21:20:10 +0000 (21:20 +0000)
src/mesa/drivers/dri/r200/Makefile
src/mesa/drivers/dri/r200/r200_cmdbuf.c
src/mesa/drivers/dri/r200/r200_context.c
src/mesa/drivers/dri/r200/r200_context.h
src/mesa/drivers/dri/r200/r200_fragshader.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_screen.c
src/mesa/drivers/dri/r200/r200_screen.h
src/mesa/drivers/dri/r200/r200_state.c
src/mesa/drivers/dri/r200/r200_state_init.c
src/mesa/drivers/dri/r200/r200_tex.h
src/mesa/drivers/dri/r200/r200_texstate.c

index e4fff57..dfeebca 100644 (file)
@@ -31,6 +31,7 @@ DRIVER_SOURCES = r200_context.c \
                 r200_vtxfmt_c.c \
                 r200_vtxfmt_sse.c \
                 r200_vtxfmt_x86.c \
+                r200_fragshader.c \
                 $(EGL_SOURCES)
 
 C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
index 2891054..759175a 100644 (file)
@@ -88,13 +88,15 @@ void r200SetUpAtomList( r200ContextPtr rmesa )
    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
    insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
    for (i = 0; i < mtu; ++i)
        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
    for (i = 0; i < mtu; ++i)
        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
    for (i = 0; i < 6; ++i)
        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
-
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
    for (i = 0; i < 8; ++i)
        insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
    for (i = 0; i < 3 + mtu; ++i)
index cbf4a42..2c95e9a 100644 (file)
@@ -67,6 +67,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define need_GL_ARB_texture_compression
 #define need_GL_ARB_vertex_buffer_object
 #define need_GL_ARB_vertex_program
+#define need_GL_ATI_fragment_shader
 #define need_GL_EXT_blend_minmax
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_secondary_color
@@ -177,6 +178,10 @@ const struct dri_extension NV_vp_extension[] = {
     { "GL_NV_vertex_program",              GL_NV_vertex_program_functions }
 };
 
+const struct dri_extension ATI_fs_extension[] = {
+    { "GL_ATI_fragment_shader",            GL_ATI_fragment_shader_functions }
+};
+
 extern const struct tnl_pipeline_stage _r200_render_stage;
 extern const struct tnl_pipeline_stage _r200_tcl_stage;
 
@@ -466,6 +471,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
    if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
       driInitSingleExtension( ctx, NV_vp_extension );
 
+   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
+      driInitSingleExtension( ctx, ATI_fs_extension );
 #if 0
    r200InitDriverFuncs( ctx );
    r200InitIoctlFuncs( ctx );
index d2902f9..aacc132 100644 (file)
@@ -278,12 +278,17 @@ struct r200_state_atom {
 #define TEX_PP_TXSIZE               4  /*2c0c*/
 #define TEX_PP_TXPITCH              5  /*2c10*/
 #define TEX_PP_BORDER_COLOR         6  /*2c14*/
-#define TEX_CMD_1                   7
-#define TEX_PP_TXOFFSET             8  /*2d00 */
-#define TEX_STATE_SIZE              9
-
-#define CUBE_CMD_0                  0  /* 1 register follows */
-#define CUBE_PP_CUBIC_FACES         1  /* 0x2c18 */
+#define TEX_CMD_1_OLDDRM            7
+#define TEX_PP_TXOFFSET_OLDDRM      8  /*2d00 */
+#define TEX_STATE_SIZE_OLDDRM       9
+#define TEX_PP_CUBIC_FACES          7
+#define TEX_PP_TXMULTI_CTL          8
+#define TEX_CMD_1_NEWDRM            9
+#define TEX_PP_TXOFFSET_NEWDRM     10
+#define TEX_STATE_SIZE_NEWDRM      11
+
+#define CUBE_CMD_0                  0  /* 1 register follows */ /* this command unnecessary */
+#define CUBE_PP_CUBIC_FACES         1  /* 0x2c18 */             /* with new enough drm */
 #define CUBE_CMD_1                  2  /* 5 registers follow */
 #define CUBE_PP_CUBIC_OFFSET_F1     3  /* 0x2d04 */
 #define CUBE_PP_CUBIC_OFFSET_F2     4  /* 0x2d08 */
@@ -308,6 +313,25 @@ struct r200_state_atom {
 #define TF_TFACTOR_5                6
 #define TF_STATE_SIZE               7
 
+#define ATF_CMD_0                   0
+#define ATF_TFACTOR_0               1
+#define ATF_TFACTOR_1               2
+#define ATF_TFACTOR_2               3
+#define ATF_TFACTOR_3               4
+#define ATF_TFACTOR_4               5
+#define ATF_TFACTOR_5               6
+#define ATF_TFACTOR_6               7
+#define ATF_TFACTOR_7               8
+#define ATF_STATE_SIZE              9
+
+/* ATI_FRAGMENT_SHADER */
+#define AFS_CMD_0                 0
+#define AFS_IC0                   1 /* 2f00 */
+#define AFS_IC1                   2 /* 2f04 */
+#define AFS_IA0                   3 /* 2f08 */
+#define AFS_IA1                   4 /* 2f0c */
+#define AFS_STATE_SIZE           33
+
 #define TCL_CMD_0                 0
 #define TCL_LIGHT_MODEL_CTL_0     1
 #define TCL_LIGHT_MODEL_CTL_1     2
@@ -533,6 +557,8 @@ struct r200_hw_state {
    struct r200_state_atom fog;
    struct r200_state_atom glt;
    struct r200_state_atom prf;
+   struct r200_state_atom afs[2];
+   struct r200_state_atom atf;
 
    int max_state_size; /* Number of bytes necessary for a full state emit. */
    GLboolean is_dirty, all_dirty;
@@ -942,6 +968,8 @@ struct r200_context {
 
    GLboolean using_hyperz;
    GLboolean texmicrotile;
+
+  struct ati_fragment_shader *afs_loaded;
 };
 
 #define R200_CONTEXT(ctx)              ((r200ContextPtr)(ctx->DriverCtx))
diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c
new file mode 100644 (file)
index 0000000..70b7526
--- /dev/null
@@ -0,0 +1,543 @@
+/**************************************************************************
+ *
+ * Copyright 2004 David Airlie
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL DAVID AIRLIE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "tnl/t_context.h"
+#include "atifragshader.h"
+#include "program.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+
+#define SET_INST(inst, type) afs_cmd[((inst<<2) + (type<<1) + 1)]
+#define SET_INST_2(inst, type) afs_cmd[((inst<<2) + (type<<1) + 2)]
+
+static void r200SetFragShaderArg( GLuint *afs_cmd, GLuint opnum, GLuint optype,
+                               const struct atifragshader_src_register srcReg,
+                               GLuint argPos, GLuint *tfactor )
+{
+   const GLuint index = srcReg.Index;
+   const GLuint srcmod = srcReg.argMod;
+   const GLuint srcrep = srcReg.argRep;
+   GLuint reg0 = 0;
+   GLuint reg2 = 0;
+   GLuint useOddSrc = 0;
+
+   switch(srcrep) {
+   case GL_RED:
+      reg2 |= R200_TXC_REPL_RED << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      if (optype)
+        useOddSrc = 1;
+      break;
+   case GL_GREEN:
+      reg2 |= R200_TXC_REPL_GREEN << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      if (optype)
+        useOddSrc = 1;
+      break;
+   case GL_BLUE:
+      if (!optype)
+        reg2 |= R200_TXC_REPL_BLUE << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      else
+        useOddSrc = 1;
+      break;
+   case GL_ALPHA:
+      if (!optype)
+        useOddSrc = 1;
+      break;
+   }
+
+   if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
+      reg0 |= (((index - GL_REG_0_ATI)*2) + 10 + useOddSrc) << (5*argPos);
+   else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
+      if ((*tfactor == 0) || (index == *tfactor)) {
+        reg0 |= (R200_TXC_ARG_A_TFACTOR_COLOR + useOddSrc) << (5*argPos);
+        reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR_SEL_SHIFT;
+        *tfactor = index;
+      }
+      else {
+        reg0 |= (R200_TXC_ARG_A_TFACTOR1_COLOR + useOddSrc) << (5*argPos);
+        reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR1_SEL_SHIFT;
+      }
+   }
+   else if (index == GL_PRIMARY_COLOR_EXT) {
+      reg0 |= (R200_TXC_ARG_A_DIFFUSE_COLOR + useOddSrc) << (5*argPos);
+   }
+   else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
+      reg0 |= (R200_TXC_ARG_A_SPECULAR_COLOR + useOddSrc) << (5*argPos);
+   }
+   /* GL_ZERO is a noop, for GL_ONE we set the complement */
+   else if (index == GL_ONE) {
+      reg0 |= R200_TXC_COMP_ARG_A << (4*argPos);
+   }
+
+   if (srcmod & GL_COMP_BIT_ATI)
+      reg0 ^= R200_TXC_COMP_ARG_A << (4*argPos);
+   if (srcmod & GL_BIAS_BIT_ATI)
+      reg0 |= R200_TXC_BIAS_ARG_A << (4*argPos);
+   if (srcmod & GL_2X_BIT_ATI)
+      reg0 |= R200_TXC_SCALE_ARG_A << (4*argPos);
+   if (srcmod & GL_NEGATE_BIT_ATI)
+      reg0 ^= R200_TXC_NEG_ARG_A << (4*argPos);
+
+   SET_INST(opnum, optype) |= reg0;
+   SET_INST_2(opnum, optype) |= reg2;
+}
+
+static GLuint dstmask_table[8] =
+{
+   R200_TXC_OUTPUT_MASK_RGB,
+   R200_TXC_OUTPUT_MASK_R,
+   R200_TXC_OUTPUT_MASK_G,
+   R200_TXC_OUTPUT_MASK_RG,
+   R200_TXC_OUTPUT_MASK_B,
+   R200_TXC_OUTPUT_MASK_RB,
+   R200_TXC_OUTPUT_MASK_GB,
+   R200_TXC_OUTPUT_MASK_RGB
+};
+
+static void r200UpdateFSArith( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint *afs_cmd;
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint pass;
+
+   R200_STATECHANGE( rmesa, afs[0] );
+   R200_STATECHANGE( rmesa, afs[1] );
+
+   if (shader->NumPasses < 2) {
+      afs_cmd = rmesa->hw.afs[1].cmd;
+   }
+   else {
+      afs_cmd = rmesa->hw.afs[0].cmd;
+   }
+   for (pass = 0; pass < shader->NumPasses; pass++) {
+      GLuint opnum = 0;
+      GLuint pc;
+      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
+         GLuint optype;
+        struct atifs_instruction *inst = &shader->Instructions[pass][pc];
+
+        SET_INST(opnum, 0) = 0;
+        SET_INST_2(opnum, 0) = 0;
+        SET_INST(opnum, 1) = 0;
+        SET_INST_2(opnum, 1) = 0;
+
+        for (optype = 0; optype < 2; optype++) {
+           GLuint tfactor = 0;
+
+           if (inst->Opcode[optype]) {
+              switch (inst->Opcode[optype]) {
+              /* these are all MADD in disguise
+                 MADD is A * B + C
+                 so for GL_ADD use arg B/C and make A complement 0
+                 for GL_SUB use arg B/C, negate C and make A complement 0
+                 for GL_MOV use arg C
+                 for GL_MUL use arg A
+                 for GL_MAD all good */
+              case GL_SUB_ATI:
+                 /* negate C */
+                 SET_INST(opnum, optype) |= R200_TXC_NEG_ARG_C;
+                 /* fallthrough */
+              case GL_ADD_ATI:
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][0], 1, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][1], 2, &tfactor);
+                 /* A = complement 0 */
+                 SET_INST(opnum, optype) |= R200_TXC_COMP_ARG_A;
+                 SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+                 break;
+              case GL_MOV_ATI:
+                 /* put arg0 in C */
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][0], 2, &tfactor);
+                 SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+                 break;
+              case GL_MAD_ATI:
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][2], 2, &tfactor);
+                 /* fallthrough */
+              case GL_MUL_ATI:
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][0], 0, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][1], 1, &tfactor);
+                 SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+                 break;
+              case GL_LERP_ATI:
+                 /* arg order is not native chip order, swap A and C */
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][0], 2, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][1], 1, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][2], 0, &tfactor);
+                 SET_INST(opnum, optype) |= R200_TXC_OP_LERP;
+                 break;
+              case GL_CND_ATI:
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][0], 0, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][1], 1, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][2], 2, &tfactor);
+                 SET_INST(opnum, optype) |= R200_TXC_OP_CONDITIONAL;
+                 break;
+              case GL_CND0_ATI:
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][0], 0, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][1], 1, &tfactor);
+                 r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                       inst->SrcReg[optype][2], 2, &tfactor);
+                 SET_INST(opnum, optype) |= R200_TXC_OP_CND0;
+                 break;
+                 /* cannot specify dot ops as alpha ops directly */
+              case GL_DOT2_ADD_ATI:
+                 if (optype)
+                    SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+                 else {
+                    r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                       inst->SrcReg[0][0], 0, &tfactor);
+                    r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                       inst->SrcReg[0][1], 1, &tfactor);
+                    r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                       inst->SrcReg[0][2], 2, &tfactor);
+                    SET_INST(opnum, 0) |= R200_TXC_OP_DOT2_ADD;
+                 }
+                 break;
+              case GL_DOT3_ATI:
+                 if (optype)
+                    SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+                 else {
+                    r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                       inst->SrcReg[0][0], 0, &tfactor);
+                    r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                       inst->SrcReg[0][1], 1, &tfactor);
+                    SET_INST(opnum, 0) |= R200_TXC_OP_DOT3;
+                 }
+                 break;
+              case GL_DOT4_ATI:
+              /* experimental verification: for dot4 setup of alpha args is needed
+                 (dstmod is ignored, though, so dot2/dot3 should be safe)
+                 the hardware apparently does R1*R2 + G1*G2 + B1*B2 + A3*A4
+                 but the API doesn't allow it */
+                 if (optype)
+                    SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+                 else {
+                    r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                       inst->SrcReg[0][0], 0, &tfactor);
+                    r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                       inst->SrcReg[0][1], 1, &tfactor);
+                    r200SetFragShaderArg(afs_cmd, opnum, 1,
+                                       inst->SrcReg[0][0], 0, &tfactor);
+                    r200SetFragShaderArg(afs_cmd, opnum, 1,
+                                       inst->SrcReg[0][1], 1, &tfactor);
+                    SET_INST(opnum, optype) |= R200_TXC_OP_DOT4;
+                 }
+                 break;
+              }
+           }
+
+           /* destination */
+           if (inst->DstReg[optype].Index) {
+              GLuint dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
+              GLuint dstmask = inst->DstReg[optype].dstMask;
+              GLuint sat = inst->DstReg[optype].dstMod & GL_SATURATE_BIT_ATI;
+              GLuint dstmod = inst->DstReg[optype].dstMod;
+
+              dstmod &= ~GL_SATURATE_BIT_ATI;
+
+              SET_INST_2(opnum, optype) |= (dstreg + 1) << R200_TXC_OUTPUT_REG_SHIFT;
+              SET_INST_2(opnum, optype) |= dstmask_table[dstmask];
+
+               /* fglrx does clamp the last instructions to 0_1 it seems */
+               /* this won't necessarily catch the last instruction
+                  which writes to reg0 */
+              if (sat || (pc == (shader->numArithInstr[pass] - 1) &&
+                       ((pass == 1) || (shader->NumPasses == 1))))
+                 SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_0_1;
+              else
+               /*should we clamp or not? spec is vague, I would suppose yes but fglrx doesn't */
+                 SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_8_8;
+/*               SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_WRAP;*/
+              switch(dstmod) {
+              case GL_2X_BIT_ATI:
+                 SET_INST_2(opnum, optype) |= R200_TXC_SCALE_2X;
+                 break;
+              case GL_4X_BIT_ATI:
+                 SET_INST_2(opnum, optype) |= R200_TXC_SCALE_4X;
+                 break;
+              case GL_8X_BIT_ATI:
+                 SET_INST_2(opnum, optype) |= R200_TXC_SCALE_8X;
+                 break;
+              case GL_HALF_BIT_ATI:
+                 SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV2;
+                 break;
+              case GL_QUARTER_BIT_ATI:
+                 SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV4;
+                 break;
+              case GL_EIGHTH_BIT_ATI:
+                 SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV8;
+                 break;
+              default:
+                 break;
+              }
+           }
+        }
+/*      fprintf(stderr, "pass %d nr %d inst 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
+               pass, opnum, SET_INST(opnum, 0), SET_INST_2(opnum, 0),
+               SET_INST(opnum, 1), SET_INST_2(opnum, 1));*/
+         opnum++;
+      }
+      afs_cmd = rmesa->hw.afs[1].cmd;
+   }
+   rmesa->afs_loaded = ctx->ATIFragmentShader.Current;
+}
+
+static void r200UpdateFSRouting( GLcontext *ctx ) {
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint reg;
+
+   R200_STATECHANGE( rmesa, ctx );
+   R200_STATECHANGE( rmesa, cst );
+
+   for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+      if (shader->swizzlerq & (1 << (2 * reg)))
+        /* r coord */
+        set_re_cntl_d3d( ctx, reg, 1);
+        /* q coord */
+      else set_re_cntl_d3d( ctx, reg, 0);
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_MULTI_PASS_ENABLE |
+                                      R200_TEX_BLEND_ENABLE_MASK |
+                                      R200_TEX_ENABLE_MASK);
+   rmesa->hw.cst.cmd[CST_PP_CNTL_X] &= ~(R200_PPX_PFS_INST_ENABLE_MASK |
+                                        R200_PPX_TEX_ENABLE_MASK |
+                                        R200_PPX_OUTPUT_REG_MASK);
+
+   /* first pass registers use slots 8 - 15
+      but single pass shaders use slots 0 - 7 */
+   if (shader->NumPasses < 2) {
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[0] == 8 ?
+        0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+        (0xff >> (8 - shader->numArithInstr[0])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_MULTI_PASS_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[1] == 8 ?
+        0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+        (0xff >> (8 - shader->numArithInstr[1])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+      rmesa->hw.cst.cmd[CST_PP_CNTL_X] |=
+        (0xff >> (8 - shader->numArithInstr[0])) << R200_PPX_FPS_INST0_ENABLE_SHIFT;
+   }
+
+   if (shader->NumPasses < 2) {
+      for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+        struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
+         R200_STATECHANGE( rmesa, tex[reg] );
+        rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = 0;
+        if (shader->SetupInst[0][reg].Opcode) {
+           GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+               & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+           txformat |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+               << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           /* fix up texcoords for proj/non-proj 2d (3d and cube are not defined when
+              using projection so don't have to worry there).
+              When passing coords, need R200_TEXCOORD_VOLUME, otherwise loose a coord */
+           /* FIXME: someone might rely on default tex coords r/q, which we unfortunately
+              don't provide (we have the same problem without shaders) */
+           if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+              txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+              if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                 shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                 txformat_x |= R200_TEXCOORD_VOLUME;
+              }
+              else {
+                 txformat_x |= R200_TEXCOORD_PROJ;
+              }
+           }
+           else if (texObj->Target == GL_TEXTURE_3D) {
+              txformat_x |= R200_TEXCOORD_VOLUME;
+           }
+           else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+              txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+           }
+           else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+               shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+              txformat_x |= R200_TEXCOORD_NONPROJ;
+           }
+           else {
+              txformat_x |= R200_TEXCOORD_PROJ;
+           }
+           rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+           rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+           /* is this a good idea? Could potentially sample from not enabled unit.
+              results are probably undefined anyway (?) but I hope it doesn't lock up... */
+           rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+        }
+      }
+
+   } else {
+      /* setup 1st pass */
+      for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+        struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
+        R200_STATECHANGE( rmesa, tex[reg] );
+        GLuint txformat_multi = 0;
+        if (shader->SetupInst[0][reg].Opcode) {
+           txformat_multi |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+               << R200_PASS1_ST_ROUTE_SHIFT;
+           if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+              txformat_multi |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+              if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                 shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                 txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+              }
+              else {
+                 txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+              }
+           }
+           else if (texObj->Target == GL_TEXTURE_3D) {
+              txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+           }
+           else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+              txformat_multi |= R200_PASS1_TEXCOORD_CUBIC_ENV;
+           }
+           else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                 shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                 txformat_multi |= R200_PASS1_TEXCOORD_NONPROJ;
+           }
+           else {
+              txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+           }
+           rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg;
+        }
+         rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+      }
+
+      /* setup 2nd pass */
+      for (reg=0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+        struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
+        if (shader->SetupInst[1][reg].Opcode) {
+           GLuint coord = shader->SetupInst[1][reg].src;
+           GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+               & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+           R200_STATECHANGE( rmesa, tex[reg] );
+           if (shader->SetupInst[1][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+              txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+              txformat_x |= R200_TEXCOORD_VOLUME;
+              if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                 shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                 txformat_x |= R200_TEXCOORD_VOLUME;
+              }
+              else {
+                 txformat_x |= R200_TEXCOORD_PROJ;
+              }
+           }
+           else if (texObj->Target == GL_TEXTURE_3D) {
+              txformat_x |= R200_TEXCOORD_VOLUME;
+           }
+           else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+              txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+           }
+           else if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+               shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+              txformat_x |= R200_TEXCOORD_NONPROJ;
+           }
+           else {
+              txformat_x |= R200_TEXCOORD_PROJ;
+           }
+           if (coord >= GL_REG_0_ATI) {
+              GLuint txformat_multi = rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL];
+              txformat_multi |= (coord - GL_REG_0_ATI + 2) << R200_PASS2_COORDS_REG_SHIFT;
+              rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+              rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= 1 <<
+                 (R200_PPX_OUTPUT_REG_0_SHIFT + coord - GL_REG_0_ATI);
+           } else {
+              txformat |= (coord - GL_TEXTURE0_ARB) << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           }
+           rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+           rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+           rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+        }
+      }
+   }
+}
+
+static void r200UpdateFSConstants( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint i;
+
+   /* update constants */
+   R200_STATECHANGE(rmesa, atf);
+   for (i = 0; i < 8; i++)
+   {
+      GLubyte con_byte[4];
+      if ((shader->localConstDef >> i) & 1) {
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[0], shader->Constants[i][0]);
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[1], shader->Constants[i][1]);
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[2], shader->Constants[i][2]);
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[3], shader->Constants[i][3]);
+      }
+      else {
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[0], ctx->ATIFragmentShader.globalConstants[i][0]);
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[1], ctx->ATIFragmentShader.globalConstants[i][1]);
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.globalConstants[i][2]);
+        CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.globalConstants[i][3]);
+      }
+      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
+        4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
+   }
+}
+
+/* update routing, constants and arithmetic
+ * constants need to be updated always (globals can change, no separate notification)
+ * routing needs to be updated always too (non-shader code will overwrite state, plus
+ * some of the routing depends on what sort of texture is bound)
+ * for both of them, we need to update anyway because of disabling/enabling ati_fs which
+ * we'd need to track otherwise
+ * arithmetic is only updated if current shader changes (and probably the data should be
+ * stored in some DriverData object attached to the mesa atifs object, i.e. binding a
+ * shader wouldn't force us to "recompile" the shader).
+ */
+void r200UpdateFragmentShader( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   r200UpdateFSConstants( ctx );
+   r200UpdateFSRouting( ctx );
+   if (rmesa->afs_loaded != ctx->ATIFragmentShader.Current)
+      r200UpdateFSArith( ctx );
+}
index 592d46c..ff4b859 100644 (file)
@@ -94,6 +94,7 @@ extern const struct dri_extension card_extensions[];
 extern const struct dri_extension blend_extensions[];
 extern const struct dri_extension ARB_vp_extension[];
 extern const struct dri_extension NV_vp_extension[];
+extern const struct dri_extension ATI_fs_extension[];
 
 #if 1
 /* Including xf86PciInfo.h introduces a bunch of errors...
@@ -357,8 +358,9 @@ r200CreateScreen( __DRIscreenPrivate *sPriv )
         /* Check if kernel module is new enough to support blend color and
            separate blend functions/equations */
         screen->drmSupportsBlendColor = (sPriv->drmMinor >= 11);
-
         screen->drmSupportsTriPerf = (sPriv->drmMinor >= 16);
+        screen->drmSupportsFragShader = (sPriv->drmMinor >= 18);
+
       }
       /* Check if ddx has set up a surface reg to cover depth buffer */
       screen->depthHasSurface = (sPriv->ddxMajor > 4);
@@ -711,6 +713,7 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc
       driInitExtensions( NULL, blend_extensions, GL_FALSE );
       driInitSingleExtension( NULL, ARB_vp_extension );
       driInitSingleExtension( NULL, NV_vp_extension );
+      driInitSingleExtension( NULL, ATI_fs_extension );
    }
 
    return (void *) psp;
index eb88797..47e6f2b 100644 (file)
@@ -94,9 +94,10 @@ typedef struct {
    unsigned int gart_texture_offset;   /* offset in card memory space */
    unsigned int gart_base;
 
-   GLboolean drmSupportsCubeMaps;       /* need radeon kernel module >=1.7 */
+   GLboolean drmSupportsCubeMaps;       /* need radeon kernel module >= 1.7 */
    GLboolean drmSupportsBlendColor;     /* need radeon kernel module >= 1.11 */
    GLboolean drmSupportsTriPerf;        /* need radeon kernel module >= 1.16 */
+   GLboolean drmSupportsFragShader;     /* need radeon kernel module >= 1.18 */
    GLboolean depthHasSurface;
 
    /* Configuration cache with default values for all contexts */
index 43126b3..9687836 100644 (file)
@@ -2087,6 +2087,34 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_VERTEX_PROGRAM, state);
       break;
 
+   case GL_FRAGMENT_SHADER_ATI:
+      if ( !state ) {
+        /* restore normal tex env colors and make sure tex env combine will get updated
+           mark env atoms dirty (as their data was overwritten by afs even
+           if they didn't change) and restore tex coord routing */
+        GLuint unit;
+        for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+               ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           /* need to guard this with drmSupportsFragmentShader? Should never get here if
+              we don't announce ATI_fs, right? */
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+           R200_STATECHANGE( rmesa, pix[unit] );
+           R200_STATECHANGE( rmesa, tex[unit] );
+         }
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+        R200_STATECHANGE( rmesa, cst );
+        R200_STATECHANGE( rmesa, tf );
+      }
+      else {
+        /* need to mark this dirty as pix/tf atoms have overwritten the data
+           even if the data in the atoms didn't change */
+        R200_STATECHANGE( rmesa, atf );
+        R200_STATECHANGE( rmesa, afs[1] );
+        /* everything else picked up in r200UpdateTextureState hopefully */
+      }
+      break;
    default:
       return;
    }
@@ -2260,7 +2288,7 @@ void r200ValidateState( GLcontext *ctx )
      r200UpdateDrawBuffer(ctx);
    }
 
-   if (new_state & _NEW_TEXTURE) {
+   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
       r200UpdateTextureState( ctx );
       new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
    }
@@ -2282,7 +2310,7 @@ void r200ValidateState( GLcontext *ctx )
     */
    if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
       update_texturematrix( ctx );
-   }      
+   }
 
    if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
       update_light( ctx );
index 27c0b94..db78afd 100644 (file)
@@ -137,9 +137,13 @@ static GLboolean check_##NM( GLcontext *ctx, int idx )     \
 CHECK( always, GL_TRUE )
 CHECK( never, GL_FALSE )
 CHECK( tex_any, ctx->Texture._EnabledUnits )
-CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded))
+CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
+CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
 CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
-CHECK( texenv, rmesa->state.envneeded & (1 << idx) )
+CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
+CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
+CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
+CHECK( afs, ctx->ATIFragmentShader._Enabled )
 CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
 CHECK( fog, ctx->Fog.Enabled )
 TCL_CHECK( tcl, GL_TRUE )
@@ -229,8 +233,8 @@ void r200InitState( r200ContextPtr rmesa )
       rmesa->hw.ATOM.dirty = GL_FALSE;                         \
       rmesa->hw.max_state_size += SZ * sizeof(int);            \
    } while (0)
-      
-      
+
+
    /* Allocate state buffers:
     */
    if (rmesa->r200Screen->drmSupportsBlendColor)
@@ -247,22 +251,46 @@ void r200InitState( r200ContextPtr rmesa )
    ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
    ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
    ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
-   ALLOC_STATE( tf, tex_any, TF_STATE_SIZE, "TF/tfactor", 0 );
-   if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
-   /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
-      ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE, "TEX/tex-0", 0 );
-      ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE, "TEX/tex-1", 1 );
-      ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+   ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
+   if (rmesa->r200Screen->drmSupportsFragShader) {
+      if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
+      /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+        ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
+        ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
+        ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+      }
+      else {
+        ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
+        ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
+        ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+      }
+      ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 );
+      ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 );
+      ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 );
+      ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 );
+      ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+      ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+      ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
    }
    else {
-      ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE, "TEX/tex-0", 0 );
-      ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE, "TEX/tex-1", 1 );
-      ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+      if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
+        ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
+        ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
+        ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+      }
+      else {
+        ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
+        ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
+        ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+      }
+      ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 );
+      ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 );
+      ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 );
+      ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 );
+      ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 );
+      ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+      ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
    }
-   ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE, "TEX/tex-2", 2 );
-   ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE, "TEX/tex-3", 3 );
-   ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE, "TEX/tex-4", 4 );
-   ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE, "TEX/tex-5", 5 );
    if (rmesa->r200Screen->drmSupportsCubeMaps) {
       ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
       ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
@@ -312,7 +340,7 @@ void r200InitState( r200ContextPtr rmesa )
    ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
    ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
    ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
-   ALLOC_STATE( pix[0], always, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
+   ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
    ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
    ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
    ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
@@ -348,19 +376,37 @@ void r200InitState( r200ContextPtr rmesa )
    rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
    rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
    rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
-   rmesa->hw.tf.cmd[TF_CMD_0]   = cmdpkt(R200_EMIT_TFACTOR_0);
-   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
-   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
-   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
-   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
-   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
-   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
-   rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
-   rmesa->hw.tex[3].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
-   rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
-   rmesa->hw.tex[4].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
-   rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
-   rmesa->hw.tex[5].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
+   if (rmesa->r200Screen->drmSupportsFragShader) {
+      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
+      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
+      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
+      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
+      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
+      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
+      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+   } else {
+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
+      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
+      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
+      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
+      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
+      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
+      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+   }
+   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
+   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
    rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
    rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
    rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
@@ -623,12 +669,20 @@ void r200InitState( r200ContextPtr rmesa )
          ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) |  /* <-- note i */
           (2 << R200_TXFORMAT_WIDTH_SHIFT) |
           (2 << R200_TXFORMAT_HEIGHT_SHIFT));
-      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
-         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
       rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
          (/* R200_TEXCOORD_PROJ | */
           0x100000);   /* Small default bias */
+      if (rmesa->r200Screen->drmSupportsFragShader) {
+        rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
+            rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+        rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
+        rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
+      }
+      else {
+         rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
+            rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+     }
 
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
index acabbc1..4438cc0 100644 (file)
@@ -44,4 +44,8 @@ extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );
 
 extern void r200InitTextureFuncs( struct dd_function_table *functions );
 
+extern void r200UpdateFragmentShader( GLcontext *ctx );
+
+extern void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d );
+
 #endif /* __R200_TEX_H__ */
index b79f3ac..db47f6f 100644 (file)
@@ -315,6 +315,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
       ASSERT(log2Width == log2Height);
       t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
                          (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+/* don't think we need this bit, if it exists at all - fglrx does not set it */
                          (R200_TXFORMAT_CUBIC_MAP_ENABLE));
       t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
       t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
@@ -591,7 +592,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
            break;
         case GL_PREVIOUS:
            if (replaceargs != unit) {
-              const GLint srcRGBreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
+              const GLint srcRGBreplace =
+                 ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
               if (op >= 2) {
                  op = op ^ replaceopa;
               }
@@ -612,7 +614,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
                  if (slot == 0)
                     color_arg[i] = r200_primary_color[op];
                  else
-                    color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[replaceargs - 1].outputreg];
+                    color_arg[i] = r200_register_color[op]
+                       [rmesa->state.texture.unit[replaceargs - 1].outputreg];
                  break;
               case GL_ZERO:
                  color_arg[i] = r200_zero_color[op];
@@ -636,7 +639,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
               if (slot == 0)
                  color_arg[i] = r200_primary_color[op];
               else
-                 color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[unit - 1].outputreg];
+                 color_arg[i] = r200_register_color[op]
+                    [rmesa->state.texture.unit[unit - 1].outputreg];
             }
            break;
         case GL_ZERO:
@@ -675,7 +679,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
            break;
         case GL_PREVIOUS:
            if (replaceargs != unit) {
-              const GLint srcAreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
+              const GLint srcAreplace =
+                 ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
               op = op ^ replaceopa;
               switch (srcAreplace) {
               case GL_TEXTURE:
@@ -691,7 +696,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
                  if (slot == 0)
                     alpha_arg[i] = r200_primary_alpha[op];
                  else
-                    alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[replaceargs - 1].outputreg];
+                    alpha_arg[i] = r200_register_alpha[op]
+                       [rmesa->state.texture.unit[replaceargs - 1].outputreg];
                  break;
               case GL_ZERO:
                  alpha_arg[i] = r200_zero_alpha[op];
@@ -715,7 +721,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
               if (slot == 0)
                  alpha_arg[i] = r200_primary_alpha[op];
               else
-                 alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[unit - 1].outputreg];
+                 alpha_arg[i] = r200_register_alpha[op]
+                   [rmesa->state.texture.unit[unit - 1].outputreg];
             }
            break;
         case GL_ZERO:
@@ -1091,7 +1098,7 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
    }
 
    R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_TEX_BLEND_ENABLE_MASK;
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE);
    rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT;
 
    return ok;
@@ -1114,11 +1121,11 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
 #define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK |       \
                              R200_TXFORMAT_HEIGHT_MASK |       \
                              R200_TXFORMAT_FORMAT_MASK |       \
-                              R200_TXFORMAT_F5_WIDTH_MASK |    \
-                              R200_TXFORMAT_F5_HEIGHT_MASK |   \
+                             R200_TXFORMAT_F5_WIDTH_MASK |     \
+                             R200_TXFORMAT_F5_HEIGHT_MASK |    \
                              R200_TXFORMAT_ALPHA_IN_MAP |      \
                              R200_TXFORMAT_CUBIC_MAP_ENABLE |  \
-                              R200_TXFORMAT_NON_POWER2)
+                             R200_TXFORMAT_NON_POWER2)
 
 #define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK |         \
                                 R200_TEXCOORD_MASK |           \
@@ -1140,15 +1147,24 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
    cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
    cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
    cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
-   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
-   R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+   if (rmesa->r200Screen->drmSupportsFragShader) {
+      cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
+   }
+   else {
+      cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
+   }
 
    if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
       GLuint *cube_cmd = R200_DB_STATE( cube[unit] );
       GLuint bytesPerFace = texobj->base.totalSize / 6;
       ASSERT(texobj->base.totalSize % 6 == 0);
       cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      if (rmesa->r200Screen->drmSupportsFragShader) {
+        /* that value is submitted twice. could change cube atom
+           to not include that command when new drm is used */
+        cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      }
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
@@ -1156,6 +1172,7 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
       R200_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] );
    }
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
 
    texobj->dirty_state &= ~(1<<unit);
 }
@@ -1378,7 +1395,7 @@ static void disable_tex( GLcontext *ctx, int unit )
    }
 }
 
-static void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
+void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
 
@@ -1596,26 +1613,27 @@ static GLboolean update_tex_common( GLcontext *ctx, int unit )
 static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
 
-   if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_RECT_BIT) ) {
+   if ( unitneeded & (TEXTURE_RECT_BIT) ) {
       return (enable_tex_rect( ctx, unit ) &&
              update_tex_common( ctx, unit ));
    }
-   else if (  rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+   else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
       return (enable_tex_2d( ctx, unit ) &&
              update_tex_common( ctx, unit ));
    }
 #if ENABLE_HW_3D_TEXTURE
-   else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_3D_BIT) ) {
+   else if ( unitneeded & (TEXTURE_3D_BIT) ) {
       return (enable_tex_3d( ctx, unit ) &&
              update_tex_common( ctx, unit ));
    }
 #endif
-   else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_CUBE_BIT) ) {
+   else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
       return (enable_tex_cube( ctx, unit ) &&
              update_tex_common( ctx, unit ));
    }
-   else if ( rmesa->state.texture.unit[unit].unitneeded ) {
+   else if ( unitneeded ) {
       return GL_FALSE;
    }
    else {
@@ -1631,8 +1649,16 @@ void r200UpdateTextureState( GLcontext *ctx )
    GLboolean ok;
    GLuint dbg;
 
-   ok = r200UpdateAllTexEnv( ctx );
-
+   if (ctx->ATIFragmentShader._Enabled) {
+      GLuint i;
+      for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+        rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
+      }
+      ok = GL_TRUE;
+   }
+   else {
+      ok = r200UpdateAllTexEnv( ctx );
+   }
    if (ok) {
       ok = (r200UpdateTextureUnit( ctx, 0 ) &&
         r200UpdateTextureUnit( ctx, 1 ) &&
@@ -1642,6 +1668,10 @@ void r200UpdateTextureState( GLcontext *ctx )
         r200UpdateTextureUnit( ctx, 5 ));
    }
 
+   if (ok && ctx->ATIFragmentShader._Enabled) {
+      r200UpdateFragmentShader(ctx);
+   }
+
    FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
 
    if (rmesa->TclFallback)
@@ -1652,24 +1682,38 @@ void r200UpdateTextureState( GLcontext *ctx )
 
       /*
        * T0 hang workaround -------------
-       * not needed for r200 derivatives?
-       */
+       * not needed for r200 derivatives
+        */
       if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
-         (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
-
-         R200_STATECHANGE(rmesa, ctx);
-         R200_STATECHANGE(rmesa, tex[1]);
-         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
-         rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
-         rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
+        (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+
+        R200_STATECHANGE(rmesa, ctx);
+        R200_STATECHANGE(rmesa, tex[1]);
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+        if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
+           rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+        rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
       }
-      else {
-         if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
-            (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
-               R200_STATECHANGE(rmesa, tex[1]);
-               rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
+      else if (!ctx->ATIFragmentShader._Enabled) {
+        if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
+           (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) {
+           R200_STATECHANGE(rmesa, tex[1]);
+           rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE;
          }
       }
+      /* do the same workaround for the first pass of a fragment shader.
+       * completely unknown if necessary / sufficient.
+       */
+      if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE &&
+        (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+
+        R200_STATECHANGE(rmesa, cst);
+        R200_STATECHANGE(rmesa, tex[1]);
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE;
+        if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE))
+           rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+        rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+      }
 
       /* maybe needs to be done pairwise due to 2 parallel (physical) tex units ?
          looks like that's not the case, if 8500/9100 owners don't complain remove this...
@@ -1695,7 +1739,8 @@ void r200UpdateTextureState( GLcontext *ctx )
 
       /*
        * Texture cache LRU hang workaround -------------
-       * not needed for r200 derivatives?
+       * not needed for r200 derivatives
+       * hopefully this covers first pass of a shader as well
        */
 
       /* While the cases below attempt to only enable the workaround in the